1# This submission to NSS is to be made available under the terms of the 2# Mozilla Public License, v. 2.0. You can obtain one at //mozilla.org/MPL/2.0/ 3# Copyright(c) 2021, Niels Möller and Mamone Tarsha 4 5# Registers: 6 7.set SP, 1 8.set TOCP, 2 9 10.macro VEC_LOAD_DATA VR, DATA, GPR 11 addis \GPR, 2, \DATA@got@ha 12 ld \GPR, \DATA@got@l(\GPR) 13 lvx \VR, 0, \GPR 14.endm 15 16.macro VEC_LOAD VR, GPR, IDX 17 lxvd2x \VR+32, \IDX, \GPR 18 vperm \VR, \VR, \VR, SWAP_MASK 19.endm 20 21.macro VEC_LOAD_INC VR, GPR, IDX 22 lxvd2x \VR+32, \IDX, \GPR 23 addi \IDX,\IDX,16 24 vperm \VR, \VR, \VR, SWAP_MASK 25.endm 26 27.macro VEC_STORE VR, GPR, IDX 28 vperm \VR, \VR, \VR, SWAP_MASK 29 stxvd2x \VR+32, \IDX, \GPR 30.endm 31 32# 0 < LEN < 16, pad the remaining bytes with zeros 33.macro LOAD_LEN DATA, LEN, VAL1, VAL0, TMP0, TMP1, TMP2 34 li \TMP0, 0 35 li \VAL1, 0 36 li \VAL0, 0 37 andi. \TMP1, \LEN, 8 38 beq 1f 39 ldbrx \VAL1, 0, \DATA 40 li \TMP0, 8 411: 42 andi. \TMP1, \LEN, 7 43 beq 3f 44 li \TMP1, 56 452: 46 lbzx \TMP2, \TMP0, \DATA 47 sld \TMP2, \TMP2, \TMP1 48 subi \TMP1, \TMP1, 8 49 or \VAL0, \VAL0, \TMP2 50 addi \TMP0, \TMP0, 1 51 cmpld \TMP0, \LEN 52 bne 2b 53 andi. \TMP1, \LEN, 8 54 bne 3f 55 mr \VAL1, \VAL0 56 li \VAL0, 0 573: 58.endm 59 60# 0 < LEN < 16 61.macro STORE_LEN DATA, LEN, VAL1, VAL0, TMP0, TMP1, TMP2 62 andi. \TMP1, \LEN, 8 63 beq 1f 64 stdbrx \VAL1, 0, \DATA 65 li \TMP0, 8 66 b 2f 671: 68 li \TMP0, 0 69 mr \VAL0, \VAL1 702: 71 andi. \TMP1, \LEN, 7 72 beq 4f 73 li \TMP1, 56 743: 75 srd \TMP2, \VAL0, \TMP1 76 subi \TMP1, \TMP1, 8 77 stbx \TMP2, \TMP0, \DATA 78 addi \TMP0, \TMP0, 1 79 cmpld \TMP0, \LEN 80 bne 3b 814: 82.endm 83 84.text 85 86################################################################################ 87# Generates the H table 88# void ppc_aes_gcmINIT(uint8_t Htbl[16*8], uint32_t *KS, int NR); 89.globl ppc_aes_gcmINIT 90.type ppc_aes_gcmINIT,@function 91.align 5 92ppc_aes_gcmINIT: 93addis TOCP,12,(.TOC.-ppc_aes_gcmINIT)@ha 94addi TOCP,TOCP,(.TOC.-ppc_aes_gcmINIT)@l 95.localentry ppc_aes_gcmINIT, .-ppc_aes_gcmINIT 96 97.set Htbl, 3 98.set KS, 4 99.set NR, 5 100 101.set ZERO, 19 102.set MSB, 18 103.set ONE, 17 104.set SWAP_MASK, 0 105.set POLY, 1 106.set K, 2 107.set H, 3 108.set H2, 4 109.set H3, 5 110.set H4, 6 111.set HP, 7 112.set HS, 8 113.set R, 9 114.set F, 10 115.set T, 11 116.set H1M, 12 117.set H1L, 13 118.set H2M, 14 119.set H2L, 15 120.set H3M, 16 121.set H3L, 17 122.set H4M, 18 123.set H4L, 19 124 125 VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 6 126 VEC_LOAD_DATA POLY, .Lpoly, 6 127 128 li 6, 0 129 VEC_LOAD_INC H, KS, 6 130 VEC_LOAD_INC K, KS, 6 131 vcipher H, H, K 132 VEC_LOAD_INC K, KS, 6 133 vcipher H, H, K 134 VEC_LOAD_INC K, KS, 6 135 vcipher H, H, K 136 VEC_LOAD_INC K, KS, 6 137 vcipher H, H, K 138 VEC_LOAD_INC K, KS, 6 139 vcipher H, H, K 140 VEC_LOAD_INC K, KS, 6 141 vcipher H, H, K 142 VEC_LOAD_INC K, KS, 6 143 vcipher H, H, K 144 VEC_LOAD_INC K, KS, 6 145 vcipher H, H, K 146 VEC_LOAD_INC K, KS, 6 147 vcipher H, H, K 148 cmpwi NR, 10 149 beq .LH_done 150 VEC_LOAD_INC K, KS, 6 151 vcipher H, H, K 152 VEC_LOAD_INC K, KS, 6 153 vcipher H, H, K 154 cmpwi NR, 12 155 beq .LH_done 156 VEC_LOAD_INC K, KS, 6 157 vcipher H, H, K 158 VEC_LOAD_INC K, KS, 6 159 vcipher H, H, K 160 161.LH_done: 162 VEC_LOAD K, KS, 6 163 vcipherlast H, H, K 164 165 vupkhsb MSB, H 166 vspltisb ONE, 1 167 vspltb MSB, MSB, 0 168 vsl H, H, ONE 169 vand MSB, MSB, POLY 170 vxor ZERO, ZERO, ZERO 171 vxor H, H, MSB 172 vsldoi POLY, ZERO, POLY, 8 173 174 vpmsumd HP, H, POLY 175 vsldoi HS, H, H, 8 176 vxor HP, HP, HS 177 vsldoi H1L, HP, HS, 8 178 vsldoi H1M, HS, HP, 8 179 vsldoi H1L, H1L, H1L, 8 180 181 # calculate H^2 182 183 vpmsumd F, H, H1L 184 vpmsumd R, H, H1M 185 186 vpmsumd T, F, POLY 187 vsldoi H2, F, F, 8 188 vxor R, R, T 189 vxor H2, H2, R 190 191 vpmsumd HP, H2, POLY 192 vsldoi HS, H2, H2, 8 193 vxor HP, HP, HS 194 vsldoi H2L, HP, HS, 8 195 vsldoi H2M, HS, HP, 8 196 vsldoi H2L, H2L, H2L, 8 197 198 # calculate H^3 199 200 vpmsumd F, H2, H1L 201 vpmsumd R, H2, H1M 202 203 vpmsumd T, F, POLY 204 vsldoi H3, F, F, 8 205 vxor R, R, T 206 vxor H3, H3, R 207 208 vpmsumd HP, H3, POLY 209 vsldoi HS, H3, H3, 8 210 vxor HP, HP, HS 211 vsldoi H3L, HP, HS, 8 212 vsldoi H3M, HS, HP, 8 213 vsldoi H3L, H3L, H3L, 8 214 215 # calculate H^4 216 217 vpmsumd F, H2, H2L 218 vpmsumd R, H2, H2M 219 220 vpmsumd T, F, POLY 221 vsldoi H4, F, F, 8 222 vxor R, R, T 223 vxor H4, H4, R 224 225 vpmsumd HP, H4, POLY 226 vsldoi HS, H4, H4, 8 227 vxor HP, HP, HS 228 vsldoi H4L, HP, HS, 8 229 vsldoi H4M, HS, HP, 8 230 vsldoi H4L, H4L, H4L, 8 231 232 li 8, 16*1 233 li 9, 16*2 234 li 10, 16*3 235 stxvd2x H1L+32, 0, Htbl 236 stxvd2x H1M+32, 8, Htbl 237 stxvd2x H2L+32, 9, Htbl 238 stxvd2x H2M+32, 10, Htbl 239 li 7, 16*4 240 li 8, 16*5 241 li 9, 16*6 242 li 10, 16*7 243 stxvd2x H3L+32, 7, Htbl 244 stxvd2x H3M+32, 8, Htbl 245 stxvd2x H4L+32, 9, Htbl 246 stxvd2x H4M+32, 10, Htbl 247 248 blr 249.size ppc_aes_gcmINIT, . - ppc_aes_gcmINIT 250 251################################################################################ 252# Authenticate only 253# void ppc_aes_gcmHASH(uint8_t Htbl[16*8], uint8_t *AAD, uint64_t Alen, uint8_t *Tp); 254.globl ppc_aes_gcmHASH 255.type ppc_aes_gcmHASH,@function 256.align 5 257ppc_aes_gcmHASH: 258addis TOCP,12,(.TOC.-ppc_aes_gcmHASH)@ha 259addi TOCP,TOCP,(.TOC.-ppc_aes_gcmHASH)@l 260.localentry ppc_aes_gcmHASH, .-ppc_aes_gcmHASH 261 262.set Htbl, 3 263.set AAD, 4 264.set Alen, 5 265.set Tp, 6 266 267.set SWAP_MASK, 0 268.set POLY, 1 269.set D, 2 270.set C0, 3 271.set C1, 4 272.set C2, 5 273.set C3, 6 274.set T, 7 275.set R, 8 276.set F, 9 277.set R2, 10 278.set F2, 11 279.set R3, 12 280.set F3, 13 281.set R4, 14 282.set F4, 15 283.set H1M, 16 284.set H1L, 17 285.set H2M, 18 286.set H2L, 19 287.set H3M, 28 288.set H3L, 29 289.set H4M, 30 290.set H4L, 31 291 292 # store non-volatile vector registers 293 addi 7, SP, -16 294 stvx 31, 0, 7 295 addi 7, SP, -32 296 stvx 30, 0, 7 297 addi 7, SP, -48 298 stvx 29, 0, 7 299 addi 7, SP, -64 300 stvx 28, 0, 7 301 302 VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 7 303 VEC_LOAD_DATA POLY, .Lpoly_r, 7 304 305 VEC_LOAD D, Tp, 0 306 307 # --- process 4 blocks --- 308 309 srdi. 7, Alen, 6 # 4-blocks loop count 310 beq .L2x 311 312 mtctr 7 # set counter register 313 314 # load table elements 315 li 8, 1*16 316 li 9, 2*16 317 li 10, 3*16 318 lxvd2x H1L+32, 0, Htbl 319 lxvd2x H1M+32, 8, Htbl 320 lxvd2x H2L+32, 9, Htbl 321 lxvd2x H2M+32, 10, Htbl 322 li 7, 4*16 323 li 8, 5*16 324 li 9, 6*16 325 li 10, 7*16 326 lxvd2x H3L+32, 7, Htbl 327 lxvd2x H3M+32, 8, Htbl 328 lxvd2x H4L+32, 9, Htbl 329 lxvd2x H4M+32, 10, Htbl 330 331 li 8, 0x10 332 li 9, 0x20 333 li 10, 0x30 334.align 5 335.L4x_loop: 336 # load input 337 lxvd2x C0+32, 0, AAD 338 lxvd2x C1+32, 8, AAD 339 lxvd2x C2+32, 9, AAD 340 lxvd2x C3+32, 10, AAD 341 342 vperm C0, C0, C0, SWAP_MASK 343 vperm C1, C1, C1, SWAP_MASK 344 vperm C2, C2, C2, SWAP_MASK 345 vperm C3, C3, C3, SWAP_MASK 346 347 # digest combining 348 vxor C0, C0, D 349 350 # polynomial multiplication 351 vpmsumd F2, H3L, C1 352 vpmsumd R2, H3M, C1 353 vpmsumd F3, H2L, C2 354 vpmsumd R3, H2M, C2 355 vpmsumd F4, H1L, C3 356 vpmsumd R4, H1M, C3 357 vpmsumd F, H4L, C0 358 vpmsumd R, H4M, C0 359 360 # deferred recombination of partial products 361 vxor F3, F3, F4 362 vxor R3, R3, R4 363 vxor F, F, F2 364 vxor R, R, R2 365 vxor F, F, F3 366 vxor R, R, R3 367 368 # reduction 369 vpmsumd T, F, POLY 370 vsldoi D, F, F, 8 371 vxor R, R, T 372 vxor D, R, D 373 374 addi AAD, AAD, 0x40 375 bdnz .L4x_loop 376 377 clrldi Alen, Alen, 58 378.L2x: 379 # --- process 2 blocks --- 380 381 srdi. 7, Alen, 5 382 beq .L1x 383 384 # load table elements 385 li 8, 1*16 386 li 9, 2*16 387 li 10, 3*16 388 lxvd2x H1L+32, 0, Htbl 389 lxvd2x H1M+32, 8, Htbl 390 lxvd2x H2L+32, 9, Htbl 391 lxvd2x H2M+32, 10, Htbl 392 393 # load input 394 li 10, 0x10 395 lxvd2x C0+32, 0, AAD 396 lxvd2x C1+32, 10, AAD 397 398 vperm C0, C0, C0, SWAP_MASK 399 vperm C1, C1, C1, SWAP_MASK 400 401 # previous digest combining 402 vxor C0, C0, D 403 404 # polynomial multiplication 405 vpmsumd F2, H1L, C1 406 vpmsumd R2, H1M, C1 407 vpmsumd F, H2L, C0 408 vpmsumd R, H2M, C0 409 410 # deferred recombination of partial products 411 vxor F, F, F2 412 vxor R, R, R2 413 414 # reduction 415 vpmsumd T, F, POLY 416 vsldoi D, F, F, 8 417 vxor R, R, T 418 vxor D, R, D 419 420 addi AAD, AAD, 0x20 421 clrldi Alen, Alen, 59 422.L1x: 423 # --- process 1 block --- 424 425 srdi. 7, Alen, 4 426 beq .Ltail 427 428 # load table elements 429 li 8, 1*16 430 lxvd2x H1L+32, 0, Htbl 431 lxvd2x H1M+32, 8, Htbl 432 433 # load input 434 lxvd2x C0+32, 0, AAD 435 436 vperm C0, C0, C0, SWAP_MASK 437 438 # previous digest combining 439 vxor C0, C0, D 440 441 # polynomial multiplication 442 vpmsumd F, H1L, C0 443 vpmsumd R, H1M, C0 444 445 # reduction 446 vpmsumd T, F, POLY 447 vsldoi D, F, F, 8 448 vxor R, R, T 449 vxor D, R, D 450 451 addi AAD, AAD, 0x10 452 clrldi Alen, Alen, 60 453 454.Ltail: 455 cmpldi Alen, 0 456 beq .Lh_done 457 # --- process the final partial block --- 458 459 # load table elements 460 li 8, 1*16 461 lxvd2x H1L+32, 0, Htbl 462 lxvd2x H1M+32, 8, Htbl 463 464 LOAD_LEN AAD, Alen, 10, 9, 3, 7, 8 465 mtvrd C0, 10 466 mtvrd C1, 9 467 xxmrghd C0+32, C0+32, C1+32 468 469 # previous digest combining 470 vxor C0, C0, D 471 472 # polynomial multiplication 473 vpmsumd F, H1L, C0 474 vpmsumd R, H1M, C0 475 476 # reduction 477 vpmsumd T, F, POLY 478 vsldoi D, F, F, 8 479 vxor R, R, T 480 vxor D, R, D 481.Lh_done: 482 VEC_STORE D, Tp, 0 483 484 # restore non-volatile vector registers 485 addi 7, SP, -16 486 lvx 31, 0, 7 487 addi 7, SP, -32 488 lvx 30, 0, 7 489 addi 7, SP, -48 490 lvx 29, 0, 7 491 addi 7, SP, -64 492 lvx 28, 0, 7 493 blr 494.size ppc_aes_gcmHASH, . - ppc_aes_gcmHASH 495 496################################################################################ 497# Generates the final GCM tag 498# void ppc_aes_gcmTAG(uint8_t Htbl[16*8], uint8_t *Tp, uint64_t Mlen, uint64_t Alen, uint8_t* X0, uint8_t* TAG); 499.globl ppc_aes_gcmTAG 500.type ppc_aes_gcmTAG,@function 501.align 5 502ppc_aes_gcmTAG: 503addis TOCP,12,(.TOC.-ppc_aes_gcmTAG)@ha 504addi TOCP,TOCP,(.TOC.-ppc_aes_gcmTAG)@l 505.localentry ppc_aes_gcmTAG, .-ppc_aes_gcmTAG 506 507.set Htbl, 3 508.set Tp, 4 509.set Mlen, 5 510.set Alen, 6 511.set X0, 7 512.set TAG, 8 513 514.set SWAP_MASK, 0 515.set POLY, 1 516.set D, 2 517.set C0, 3 518.set C1, 4 519.set T, 5 520.set R, 6 521.set F, 7 522.set H1M, 8 523.set H1L, 9 524.set X, 10 525 526 VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 9 527 VEC_LOAD_DATA POLY, .Lpoly_r, 9 528 529 VEC_LOAD D, Tp, 0 530 531 # load table elements 532 li 9, 1*16 533 lxvd2x H1L+32, 0, Htbl 534 lxvd2x H1M+32, 9, Htbl 535 536 sldi Alen, Alen, 3 537 sldi Mlen, Mlen, 3 538 mtvrd C0, Alen 539 mtvrd C1, Mlen 540 xxmrghd C0+32, C0+32, C1+32 541 542 # previous digest combining 543 vxor C0, C0, D 544 545 # polynomial multiplication 546 vpmsumd F, H1L, C0 547 vpmsumd R, H1M, C0 548 549 # reduction 550 vpmsumd T, F, POLY 551 vsldoi D, F, F, 8 552 vxor R, R, T 553 vxor D, R, D 554 555 lxvd2x X+32, 0, X0 556 vperm D, D, D, SWAP_MASK 557 vxor X, X, D 558 stxvd2x X+32, 0, TAG 559 560 blr 561.size ppc_aes_gcmTAG, . - ppc_aes_gcmTAG 562 563################################################################################ 564# Crypt only 565# void ppc_aes_gcmCRYPT(const uint8_t* PT, uint8_t* CT, uint64_t LEN, uint8_t *CTRP, uint32_t *KS, int NR); 566.globl ppc_aes_gcmCRYPT 567.type ppc_aes_gcmCRYPT,@function 568.align 5 569ppc_aes_gcmCRYPT: 570addis TOCP,12,(.TOC.-ppc_aes_gcmCRYPT)@ha 571addi TOCP,TOCP,(.TOC.-ppc_aes_gcmCRYPT)@l 572.localentry ppc_aes_gcmCRYPT, .-ppc_aes_gcmCRYPT 573 574.set PT, 3 575.set CT, 4 576.set LEN, 5 577.set CTRP, 6 578.set KS, 7 579.set NR, 8 580 581.set SWAP_MASK, 0 582.set K, 1 583.set CTR, 2 584.set CTR0, 3 585.set CTR1, 4 586.set CTR2, 5 587.set CTR3, 6 588.set CTR4, 7 589.set CTR5, 8 590.set CTR6, 9 591.set CTR7, 10 592.set ZERO, 11 593.set I1, 12 594.set I2, 13 595.set I3, 14 596.set I4, 15 597.set I5, 16 598.set I6, 17 599.set I7, 18 600.set I8, 19 601.set IN0, 24 602.set IN1, 25 603.set IN2, 26 604.set IN3, 27 605.set IN4, 28 606.set IN5, 29 607.set IN6, 30 608.set IN7, 31 609 610.macro ROUND_8 611 VEC_LOAD_INC K, KS, 10 612 vcipher CTR0, CTR0, K 613 vcipher CTR1, CTR1, K 614 vcipher CTR2, CTR2, K 615 vcipher CTR3, CTR3, K 616 vcipher CTR4, CTR4, K 617 vcipher CTR5, CTR5, K 618 vcipher CTR6, CTR6, K 619 vcipher CTR7, CTR7, K 620.endm 621 622.macro ROUND_4 623 VEC_LOAD_INC K, KS, 10 624 vcipher CTR0, CTR0, K 625 vcipher CTR1, CTR1, K 626 vcipher CTR2, CTR2, K 627 vcipher CTR3, CTR3, K 628.endm 629 630.macro ROUND_2 631 VEC_LOAD_INC K, KS, 10 632 vcipher CTR0, CTR0, K 633 vcipher CTR1, CTR1, K 634.endm 635 636.macro ROUND_1 637 VEC_LOAD_INC K, KS, 10 638 vcipher CTR0, CTR0, K 639.endm 640 641 # store non-volatile general registers 642 std 31,-8(SP); 643 std 30,-16(SP); 644 std 29,-24(SP); 645 std 28,-32(SP); 646 std 27,-40(SP); 647 std 26,-48(SP); 648 std 25,-56(SP); 649 650 # store non-volatile vector registers 651 addi 9, SP, -80 652 stvx 31, 0, 9 653 addi 9, SP, -96 654 stvx 30, 0, 9 655 addi 9, SP, -112 656 stvx 29, 0, 9 657 addi 9, SP, -128 658 stvx 28, 0, 9 659 addi 9, SP, -144 660 stvx 27, 0, 9 661 addi 9, SP, -160 662 stvx 26, 0, 9 663 addi 9, SP, -176 664 stvx 25, 0, 9 665 addi 9, SP, -192 666 stvx 24, 0, 9 667 668 VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 9 669 670 vxor ZERO, ZERO, ZERO 671 vspltisb I1, 1 672 vspltisb I2, 2 673 vspltisb I3, 3 674 vspltisb I4, 4 675 vspltisb I5, 5 676 vspltisb I6, 6 677 vspltisb I7, 7 678 vspltisb I8, 8 679 vsldoi I1, ZERO, I1, 1 680 vsldoi I2, ZERO, I2, 1 681 vsldoi I3, ZERO, I3, 1 682 vsldoi I4, ZERO, I4, 1 683 vsldoi I5, ZERO, I5, 1 684 vsldoi I6, ZERO, I6, 1 685 vsldoi I7, ZERO, I7, 1 686 vsldoi I8, ZERO, I8, 1 687 688 VEC_LOAD CTR, CTRP, 0 689 690 srdi. 9, LEN, 7 691 beq .Lctr_4x 692 693 mtctr 9 694 695 li 25, 0x10 696 li 26, 0x20 697 li 27, 0x30 698 li 28, 0x40 699 li 29, 0x50 700 li 30, 0x60 701 li 31, 0x70 702 703.align 5 704.L8x_loop: 705 li 10, 0 706 VEC_LOAD_INC K, KS, 10 707 708 vadduwm CTR1, CTR, I1 709 vadduwm CTR2, CTR, I2 710 vadduwm CTR3, CTR, I3 711 vadduwm CTR4, CTR, I4 712 vadduwm CTR5, CTR, I5 713 vadduwm CTR6, CTR, I6 714 vadduwm CTR7, CTR, I7 715 716 vxor CTR0, CTR, K 717 vxor CTR1, CTR1, K 718 vxor CTR2, CTR2, K 719 vxor CTR3, CTR3, K 720 vxor CTR4, CTR4, K 721 vxor CTR5, CTR5, K 722 vxor CTR6, CTR6, K 723 vxor CTR7, CTR7, K 724 725 ROUND_8 726 ROUND_8 727 ROUND_8 728 ROUND_8 729 ROUND_8 730 ROUND_8 731 ROUND_8 732 ROUND_8 733 ROUND_8 734 cmpwi NR, 10 735 beq .Llast_8 736 ROUND_8 737 ROUND_8 738 cmpwi NR, 12 739 beq .Llast_8 740 ROUND_8 741 ROUND_8 742 743.Llast_8: 744 VEC_LOAD K, KS, 10 745 vcipherlast CTR0, CTR0, K 746 vcipherlast CTR1, CTR1, K 747 vcipherlast CTR2, CTR2, K 748 vcipherlast CTR3, CTR3, K 749 vcipherlast CTR4, CTR4, K 750 vcipherlast CTR5, CTR5, K 751 vcipherlast CTR6, CTR6, K 752 vcipherlast CTR7, CTR7, K 753 754 lxvd2x IN0+32, 0, PT 755 lxvd2x IN1+32, 25, PT 756 lxvd2x IN2+32, 26, PT 757 lxvd2x IN3+32, 27, PT 758 lxvd2x IN4+32, 28, PT 759 lxvd2x IN5+32, 29, PT 760 lxvd2x IN6+32, 30, PT 761 lxvd2x IN7+32, 31, PT 762 763 vperm CTR0, CTR0, CTR0, SWAP_MASK 764 vperm CTR1, CTR1, CTR1, SWAP_MASK 765 vperm CTR2, CTR2, CTR2, SWAP_MASK 766 vperm CTR3, CTR3, CTR3, SWAP_MASK 767 vperm CTR4, CTR4, CTR4, SWAP_MASK 768 vperm CTR5, CTR5, CTR5, SWAP_MASK 769 vperm CTR6, CTR6, CTR6, SWAP_MASK 770 vperm CTR7, CTR7, CTR7, SWAP_MASK 771 772 vxor IN0, IN0, CTR0 773 vxor IN1, IN1, CTR1 774 vxor IN2, IN2, CTR2 775 vxor IN3, IN3, CTR3 776 vxor IN4, IN4, CTR4 777 vxor IN5, IN5, CTR5 778 vxor IN6, IN6, CTR6 779 vxor IN7, IN7, CTR7 780 781 stxvd2x IN0+32, 0, CT 782 stxvd2x IN1+32, 25, CT 783 stxvd2x IN2+32, 26, CT 784 stxvd2x IN3+32, 27, CT 785 stxvd2x IN4+32, 28, CT 786 stxvd2x IN5+32, 29, CT 787 stxvd2x IN6+32, 30, CT 788 stxvd2x IN7+32, 31, CT 789 790 vadduwm CTR, CTR, I8 791 addi PT, PT, 0x80 792 addi CT, CT, 0x80 793 bdnz .L8x_loop 794 795 clrldi LEN, LEN, 57 796 797.Lctr_4x: 798 srdi. 9, LEN, 6 799 beq .Lctr_2x 800 801 li 10, 0 802 li 29, 0x10 803 li 30, 0x20 804 li 31, 0x30 805 806 VEC_LOAD_INC K, KS, 10 807 808 vadduwm CTR1, CTR, I1 809 vadduwm CTR2, CTR, I2 810 vadduwm CTR3, CTR, I3 811 812 vxor CTR0, CTR, K 813 vxor CTR1, CTR1, K 814 vxor CTR2, CTR2, K 815 vxor CTR3, CTR3, K 816 817 ROUND_4 818 ROUND_4 819 ROUND_4 820 ROUND_4 821 ROUND_4 822 ROUND_4 823 ROUND_4 824 ROUND_4 825 ROUND_4 826 cmpwi NR, 10 827 beq .Llast_4 828 ROUND_4 829 ROUND_4 830 cmpwi NR, 12 831 beq .Llast_4 832 ROUND_4 833 ROUND_4 834 835.Llast_4: 836 VEC_LOAD K, KS, 10 837 vcipherlast CTR0, CTR0, K 838 vcipherlast CTR1, CTR1, K 839 vcipherlast CTR2, CTR2, K 840 vcipherlast CTR3, CTR3, K 841 842 lxvd2x IN0+32, 0, PT 843 lxvd2x IN1+32, 29, PT 844 lxvd2x IN2+32, 30, PT 845 lxvd2x IN3+32, 31, PT 846 847 vperm CTR0, CTR0, CTR0, SWAP_MASK 848 vperm CTR1, CTR1, CTR1, SWAP_MASK 849 vperm CTR2, CTR2, CTR2, SWAP_MASK 850 vperm CTR3, CTR3, CTR3, SWAP_MASK 851 852 vxor IN0, IN0, CTR0 853 vxor IN1, IN1, CTR1 854 vxor IN2, IN2, CTR2 855 vxor IN3, IN3, CTR3 856 857 stxvd2x IN0+32, 0, CT 858 stxvd2x IN1+32, 29, CT 859 stxvd2x IN2+32, 30, CT 860 stxvd2x IN3+32, 31, CT 861 862 vadduwm CTR, CTR, I4 863 addi PT, PT, 0x40 864 addi CT, CT, 0x40 865 866 clrldi LEN, LEN, 58 867 868.Lctr_2x: 869 srdi. 9, LEN, 5 870 beq .Lctr_1x 871 872 li 10, 0 873 li 31, 0x10 874 875 VEC_LOAD_INC K, KS, 10 876 877 vadduwm CTR1, CTR, I1 878 879 vxor CTR0, CTR, K 880 vxor CTR1, CTR1, K 881 882 ROUND_2 883 ROUND_2 884 ROUND_2 885 ROUND_2 886 ROUND_2 887 ROUND_2 888 ROUND_2 889 ROUND_2 890 ROUND_2 891 cmpwi NR, 10 892 beq .Llast_2 893 ROUND_2 894 ROUND_2 895 cmpwi NR, 12 896 beq .Llast_2 897 ROUND_2 898 ROUND_2 899 900.Llast_2: 901 VEC_LOAD K, KS, 10 902 vcipherlast CTR0, CTR0, K 903 vcipherlast CTR1, CTR1, K 904 905 lxvd2x IN0+32, 0, PT 906 lxvd2x IN1+32, 31, PT 907 908 vperm CTR0, CTR0, CTR0, SWAP_MASK 909 vperm CTR1, CTR1, CTR1, SWAP_MASK 910 911 vxor IN0, IN0, CTR0 912 vxor IN1, IN1, CTR1 913 914 stxvd2x IN0+32, 0, CT 915 stxvd2x IN1+32, 31, CT 916 917 vadduwm CTR, CTR, I2 918 addi PT, PT, 0x20 919 addi CT, CT, 0x20 920 921 clrldi LEN, LEN, 59 922 923.Lctr_1x: 924 srdi. 9, LEN, 4 925 beq .Lctr_tail 926 927 li 10, 0 928 929 VEC_LOAD_INC K, KS, 10 930 vxor CTR0, CTR, K 931 932 ROUND_1 933 ROUND_1 934 ROUND_1 935 ROUND_1 936 ROUND_1 937 ROUND_1 938 ROUND_1 939 ROUND_1 940 ROUND_1 941 cmpwi NR, 10 942 beq .Llast_1 943 ROUND_1 944 ROUND_1 945 cmpwi NR, 12 946 beq .Llast_1 947 ROUND_1 948 ROUND_1 949 950.Llast_1: 951 VEC_LOAD K, KS, 10 952 vcipherlast CTR0, CTR0, K 953 954 lxvd2x IN0+32, 0, PT 955 956 vperm CTR0, CTR0, CTR0, SWAP_MASK 957 958 vxor IN0, IN0, CTR0 959 960 stxvd2x IN0+32, 0, CT 961 962 vadduwm CTR, CTR, I1 963 addi PT, PT, 0x10 964 addi CT, CT, 0x10 965 966 clrldi LEN, LEN, 60 967 968.Lctr_tail: 969 cmpldi LEN, 0 970 beq .Lc_done 971 972 li 10, 0 973 974 VEC_LOAD_INC K, KS, 10 975 vxor CTR0, CTR, K 976 977 ROUND_1 978 ROUND_1 979 ROUND_1 980 ROUND_1 981 ROUND_1 982 ROUND_1 983 ROUND_1 984 ROUND_1 985 ROUND_1 986 cmpwi NR, 10 987 beq .Llast_tail 988 ROUND_1 989 ROUND_1 990 cmpwi NR, 12 991 beq .Llast_tail 992 ROUND_1 993 ROUND_1 994 995.Llast_tail: 996 VEC_LOAD K, KS, 10 997 vcipherlast CTR0, CTR0, K 998 999 LOAD_LEN PT, LEN, 10, 9, 29, 30, 31 1000 1001 vsldoi CTR1, CTR0, CTR0, 8 1002 mfvrd 31, CTR0 1003 mfvrd 30, CTR1 1004 1005 xor 10, 10, 31 1006 xor 9, 9, 30 1007 1008 STORE_LEN CT, LEN, 10, 9, 29, 30, 31 1009 1010 vadduwm CTR, CTR, I1 1011 1012.Lc_done: 1013 VEC_STORE CTR, CTRP, 0 1014 1015 # restore non-volatile vector registers 1016 addi 9, SP, -80 1017 lvx 31, 0, 9 1018 addi 9, SP, -96 1019 lvx 30, 0, 9 1020 addi 9, SP, -112 1021 lvx 29, 0, 9 1022 addi 9, SP, -128 1023 lvx 28, 0, 9 1024 addi 9, SP, -144 1025 lvx 27, 0, 9 1026 addi 9, SP, -160 1027 lvx 26, 0, 9 1028 addi 9, SP, -176 1029 lvx 25, 0, 9 1030 addi 9, SP, -192 1031 lvx 24, 0, 9 1032 1033 # restore non-volatile general registers 1034 ld 31,-8(SP); 1035 ld 30,-16(SP); 1036 ld 29,-24(SP); 1037 ld 28,-32(SP); 1038 ld 27,-40(SP); 1039 ld 26,-48(SP); 1040 ld 25,-56(SP); 1041 blr 1042.size ppc_aes_gcmCRYPT, . - ppc_aes_gcmCRYPT 1043 1044.data 1045.align 4 1046.Lpoly: 1047 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 1048.Lpoly_r: 1049 .byte 0,0,0,0,0,0,0,0xc2,0,0,0,0,0,0,0,0 1050.Ldb_bswap_mask: 1051 .byte 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7 1052