1# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org> 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions 6# are met: 7# 8# * Redistributions of source code must retain copyright notices, 9# this list of conditions and the following disclaimer. 10# 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following 13# disclaimer in the documentation and/or other materials 14# provided with the distribution. 15# 16# * Neither the name of the Andy Polyakov nor the names of its 17# copyright holder and contributors may be used to endorse or 18# promote products derived from this software without specific 19# prior written permission. 20# 21# ALTERNATIVELY, provided that this notice is retained in full, this 22# product may be distributed under the terms of the GNU General Public 23# License (GPL), in which case the provisions of the GPL apply INSTEAD OF 24# those given above. 25# 26# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 27# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37# 38# *** This file is auto-generated *** 39# 40# 1 "lib/accelerated/aarch64/elf/aes-aarch64.s.tmp.S" 41# 1 "<built-in>" 42# 1 "<command-line>" 43# 1 "lib/accelerated/aarch64/elf/aes-aarch64.s.tmp.S" 44# 1 "lib/accelerated/aarch64/aarch64-common.h" 1 45# 2 "lib/accelerated/aarch64/elf/aes-aarch64.s.tmp.S" 2 46 47 48.text 49.arch armv8-a+crypto 50.align 5 51.Lrcon: 52.long 0x01,0x01,0x01,0x01 53.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 54.long 0x1b,0x1b,0x1b,0x1b 55 56.globl aes_v8_set_encrypt_key 57.type aes_v8_set_encrypt_key,%function 58.align 5 59aes_v8_set_encrypt_key: 60.Lenc_key: 61 stp x29,x30,[sp,#-16]! 62 add x29,sp,#0 63 mov x3,#-1 64 cmp x0,#0 65 b.eq .Lenc_key_abort 66 cmp x2,#0 67 b.eq .Lenc_key_abort 68 mov x3,#-2 69 cmp w1,#128 70 b.lt .Lenc_key_abort 71 cmp w1,#256 72 b.gt .Lenc_key_abort 73 tst w1,#0x3f 74 b.ne .Lenc_key_abort 75 76 adr x3,.Lrcon 77 cmp w1,#192 78 79 eor v0.16b,v0.16b,v0.16b 80 ld1 {v3.16b},[x0],#16 81 mov w1,#8 82 ld1 {v1.4s,v2.4s},[x3],#32 83 84 b.lt .Loop128 85 b.eq .L192 86 b .L256 87 88.align 4 89.Loop128: 90 tbl v6.16b,{v3.16b},v2.16b 91 ext v5.16b,v0.16b,v3.16b,#12 92 st1 {v3.4s},[x2],#16 93 aese v6.16b,v0.16b 94 subs w1,w1,#1 95 96 eor v3.16b,v3.16b,v5.16b 97 ext v5.16b,v0.16b,v5.16b,#12 98 eor v3.16b,v3.16b,v5.16b 99 ext v5.16b,v0.16b,v5.16b,#12 100 eor v6.16b,v6.16b,v1.16b 101 eor v3.16b,v3.16b,v5.16b 102 shl v1.16b,v1.16b,#1 103 eor v3.16b,v3.16b,v6.16b 104 b.ne .Loop128 105 106 ld1 {v1.4s},[x3] 107 108 tbl v6.16b,{v3.16b},v2.16b 109 ext v5.16b,v0.16b,v3.16b,#12 110 st1 {v3.4s},[x2],#16 111 aese v6.16b,v0.16b 112 113 eor v3.16b,v3.16b,v5.16b 114 ext v5.16b,v0.16b,v5.16b,#12 115 eor v3.16b,v3.16b,v5.16b 116 ext v5.16b,v0.16b,v5.16b,#12 117 eor v6.16b,v6.16b,v1.16b 118 eor v3.16b,v3.16b,v5.16b 119 shl v1.16b,v1.16b,#1 120 eor v3.16b,v3.16b,v6.16b 121 122 tbl v6.16b,{v3.16b},v2.16b 123 ext v5.16b,v0.16b,v3.16b,#12 124 st1 {v3.4s},[x2],#16 125 aese v6.16b,v0.16b 126 127 eor v3.16b,v3.16b,v5.16b 128 ext v5.16b,v0.16b,v5.16b,#12 129 eor v3.16b,v3.16b,v5.16b 130 ext v5.16b,v0.16b,v5.16b,#12 131 eor v6.16b,v6.16b,v1.16b 132 eor v3.16b,v3.16b,v5.16b 133 eor v3.16b,v3.16b,v6.16b 134 st1 {v3.4s},[x2] 135 add x2,x2,#0x50 136 137 mov w12,#10 138 b .Ldone 139 140.align 4 141.L192: 142 ld1 {v4.8b},[x0],#8 143 movi v6.16b,#8 144 st1 {v3.4s},[x2],#16 145 sub v2.16b,v2.16b,v6.16b 146 147.Loop192: 148 tbl v6.16b,{v4.16b},v2.16b 149 ext v5.16b,v0.16b,v3.16b,#12 150 st1 {v4.8b},[x2],#8 151 aese v6.16b,v0.16b 152 subs w1,w1,#1 153 154 eor v3.16b,v3.16b,v5.16b 155 ext v5.16b,v0.16b,v5.16b,#12 156 eor v3.16b,v3.16b,v5.16b 157 ext v5.16b,v0.16b,v5.16b,#12 158 eor v3.16b,v3.16b,v5.16b 159 160 dup v5.4s,v3.s[3] 161 eor v5.16b,v5.16b,v4.16b 162 eor v6.16b,v6.16b,v1.16b 163 ext v4.16b,v0.16b,v4.16b,#12 164 shl v1.16b,v1.16b,#1 165 eor v4.16b,v4.16b,v5.16b 166 eor v3.16b,v3.16b,v6.16b 167 eor v4.16b,v4.16b,v6.16b 168 st1 {v3.4s},[x2],#16 169 b.ne .Loop192 170 171 mov w12,#12 172 add x2,x2,#0x20 173 b .Ldone 174 175.align 4 176.L256: 177 ld1 {v4.16b},[x0] 178 mov w1,#7 179 mov w12,#14 180 st1 {v3.4s},[x2],#16 181 182.Loop256: 183 tbl v6.16b,{v4.16b},v2.16b 184 ext v5.16b,v0.16b,v3.16b,#12 185 st1 {v4.4s},[x2],#16 186 aese v6.16b,v0.16b 187 subs w1,w1,#1 188 189 eor v3.16b,v3.16b,v5.16b 190 ext v5.16b,v0.16b,v5.16b,#12 191 eor v3.16b,v3.16b,v5.16b 192 ext v5.16b,v0.16b,v5.16b,#12 193 eor v6.16b,v6.16b,v1.16b 194 eor v3.16b,v3.16b,v5.16b 195 shl v1.16b,v1.16b,#1 196 eor v3.16b,v3.16b,v6.16b 197 st1 {v3.4s},[x2],#16 198 b.eq .Ldone 199 200 dup v6.4s,v3.s[3] 201 ext v5.16b,v0.16b,v4.16b,#12 202 aese v6.16b,v0.16b 203 204 eor v4.16b,v4.16b,v5.16b 205 ext v5.16b,v0.16b,v5.16b,#12 206 eor v4.16b,v4.16b,v5.16b 207 ext v5.16b,v0.16b,v5.16b,#12 208 eor v4.16b,v4.16b,v5.16b 209 210 eor v4.16b,v4.16b,v6.16b 211 b .Loop256 212 213.Ldone: 214 str w12,[x2] 215 mov x3,#0 216 217.Lenc_key_abort: 218 mov x0,x3 219 ldr x29,[sp],#16 220 ret 221.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key 222 223.globl aes_v8_set_decrypt_key 224.type aes_v8_set_decrypt_key,%function 225.align 5 226aes_v8_set_decrypt_key: 227.inst 0xd503233f 228 stp x29,x30,[sp,#-16]! 229 add x29,sp,#0 230 bl .Lenc_key 231 232 cmp x0,#0 233 b.ne .Ldec_key_abort 234 235 sub x2,x2,#240 236 mov x4,#-16 237 add x0,x2,x12,lsl#4 238 239 ld1 {v0.4s},[x2] 240 ld1 {v1.4s},[x0] 241 st1 {v0.4s},[x0],x4 242 st1 {v1.4s},[x2],#16 243 244.Loop_imc: 245 ld1 {v0.4s},[x2] 246 ld1 {v1.4s},[x0] 247 aesimc v0.16b,v0.16b 248 aesimc v1.16b,v1.16b 249 st1 {v0.4s},[x0],x4 250 st1 {v1.4s},[x2],#16 251 cmp x0,x2 252 b.hi .Loop_imc 253 254 ld1 {v0.4s},[x2] 255 aesimc v0.16b,v0.16b 256 st1 {v0.4s},[x0] 257 258 eor x0,x0,x0 259.Ldec_key_abort: 260 ldp x29,x30,[sp],#16 261.inst 0xd50323bf 262 ret 263.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key 264.globl aes_v8_encrypt 265.type aes_v8_encrypt,%function 266.align 5 267aes_v8_encrypt: 268 ldr w3,[x2,#240] 269 ld1 {v0.4s},[x2],#16 270 ld1 {v2.16b},[x0] 271 sub w3,w3,#2 272 ld1 {v1.4s},[x2],#16 273 274.Loop_enc: 275 aese v2.16b,v0.16b 276 aesmc v2.16b,v2.16b 277 ld1 {v0.4s},[x2],#16 278 subs w3,w3,#2 279 aese v2.16b,v1.16b 280 aesmc v2.16b,v2.16b 281 ld1 {v1.4s},[x2],#16 282 b.gt .Loop_enc 283 284 aese v2.16b,v0.16b 285 aesmc v2.16b,v2.16b 286 ld1 {v0.4s},[x2] 287 aese v2.16b,v1.16b 288 eor v2.16b,v2.16b,v0.16b 289 290 st1 {v2.16b},[x1] 291 ret 292.size aes_v8_encrypt,.-aes_v8_encrypt 293.globl aes_v8_decrypt 294.type aes_v8_decrypt,%function 295.align 5 296aes_v8_decrypt: 297 ldr w3,[x2,#240] 298 ld1 {v0.4s},[x2],#16 299 ld1 {v2.16b},[x0] 300 sub w3,w3,#2 301 ld1 {v1.4s},[x2],#16 302 303.Loop_dec: 304 aesd v2.16b,v0.16b 305 aesimc v2.16b,v2.16b 306 ld1 {v0.4s},[x2],#16 307 subs w3,w3,#2 308 aesd v2.16b,v1.16b 309 aesimc v2.16b,v2.16b 310 ld1 {v1.4s},[x2],#16 311 b.gt .Loop_dec 312 313 aesd v2.16b,v0.16b 314 aesimc v2.16b,v2.16b 315 ld1 {v0.4s},[x2] 316 aesd v2.16b,v1.16b 317 eor v2.16b,v2.16b,v0.16b 318 319 st1 {v2.16b},[x1] 320 ret 321.size aes_v8_decrypt,.-aes_v8_decrypt 322.globl aes_v8_cbc_encrypt 323.type aes_v8_cbc_encrypt,%function 324.align 5 325aes_v8_cbc_encrypt: 326 stp x29,x30,[sp,#-16]! 327 add x29,sp,#0 328 subs x2,x2,#16 329 mov x8,#16 330 b.lo .Lcbc_abort 331 csel x8,xzr,x8,eq 332 333 cmp w5,#0 334 ldr w5,[x3,#240] 335 and x2,x2,#-16 336 ld1 {v6.16b},[x4] 337 ld1 {v0.16b},[x0],x8 338 339 ld1 {v16.4s,v17.4s},[x3] 340 sub w5,w5,#6 341 add x7,x3,x5,lsl#4 342 sub w5,w5,#2 343 ld1 {v18.4s,v19.4s},[x7],#32 344 ld1 {v20.4s,v21.4s},[x7],#32 345 ld1 {v22.4s,v23.4s},[x7],#32 346 ld1 {v7.4s},[x7] 347 348 add x7,x3,#32 349 mov w6,w5 350 b.eq .Lcbc_dec 351 352 cmp w5,#2 353 eor v0.16b,v0.16b,v6.16b 354 eor v5.16b,v16.16b,v7.16b 355 b.eq .Lcbc_enc128 356 357 ld1 {v2.4s,v3.4s},[x7] 358 add x7,x3,#16 359 add x6,x3,#16*4 360 add x12,x3,#16*5 361 aese v0.16b,v16.16b 362 aesmc v0.16b,v0.16b 363 add x14,x3,#16*6 364 add x3,x3,#16*7 365 b .Lenter_cbc_enc 366 367.align 4 368.Loop_cbc_enc: 369 aese v0.16b,v16.16b 370 aesmc v0.16b,v0.16b 371 st1 {v6.16b},[x1],#16 372.Lenter_cbc_enc: 373 aese v0.16b,v17.16b 374 aesmc v0.16b,v0.16b 375 aese v0.16b,v2.16b 376 aesmc v0.16b,v0.16b 377 ld1 {v16.4s},[x6] 378 cmp w5,#4 379 aese v0.16b,v3.16b 380 aesmc v0.16b,v0.16b 381 ld1 {v17.4s},[x12] 382 b.eq .Lcbc_enc192 383 384 aese v0.16b,v16.16b 385 aesmc v0.16b,v0.16b 386 ld1 {v16.4s},[x14] 387 aese v0.16b,v17.16b 388 aesmc v0.16b,v0.16b 389 ld1 {v17.4s},[x3] 390 nop 391 392.Lcbc_enc192: 393 aese v0.16b,v16.16b 394 aesmc v0.16b,v0.16b 395 subs x2,x2,#16 396 aese v0.16b,v17.16b 397 aesmc v0.16b,v0.16b 398 csel x8,xzr,x8,eq 399 aese v0.16b,v18.16b 400 aesmc v0.16b,v0.16b 401 aese v0.16b,v19.16b 402 aesmc v0.16b,v0.16b 403 ld1 {v16.16b},[x0],x8 404 aese v0.16b,v20.16b 405 aesmc v0.16b,v0.16b 406 eor v16.16b,v16.16b,v5.16b 407 aese v0.16b,v21.16b 408 aesmc v0.16b,v0.16b 409 ld1 {v17.4s},[x7] 410 aese v0.16b,v22.16b 411 aesmc v0.16b,v0.16b 412 aese v0.16b,v23.16b 413 eor v6.16b,v0.16b,v7.16b 414 b.hs .Loop_cbc_enc 415 416 st1 {v6.16b},[x1],#16 417 b .Lcbc_done 418 419.align 5 420.Lcbc_enc128: 421 ld1 {v2.4s,v3.4s},[x7] 422 aese v0.16b,v16.16b 423 aesmc v0.16b,v0.16b 424 b .Lenter_cbc_enc128 425.Loop_cbc_enc128: 426 aese v0.16b,v16.16b 427 aesmc v0.16b,v0.16b 428 st1 {v6.16b},[x1],#16 429.Lenter_cbc_enc128: 430 aese v0.16b,v17.16b 431 aesmc v0.16b,v0.16b 432 subs x2,x2,#16 433 aese v0.16b,v2.16b 434 aesmc v0.16b,v0.16b 435 csel x8,xzr,x8,eq 436 aese v0.16b,v3.16b 437 aesmc v0.16b,v0.16b 438 aese v0.16b,v18.16b 439 aesmc v0.16b,v0.16b 440 aese v0.16b,v19.16b 441 aesmc v0.16b,v0.16b 442 ld1 {v16.16b},[x0],x8 443 aese v0.16b,v20.16b 444 aesmc v0.16b,v0.16b 445 aese v0.16b,v21.16b 446 aesmc v0.16b,v0.16b 447 aese v0.16b,v22.16b 448 aesmc v0.16b,v0.16b 449 eor v16.16b,v16.16b,v5.16b 450 aese v0.16b,v23.16b 451 eor v6.16b,v0.16b,v7.16b 452 b.hs .Loop_cbc_enc128 453 454 st1 {v6.16b},[x1],#16 455 b .Lcbc_done 456.align 5 457.Lcbc_dec: 458 ld1 {v18.16b},[x0],#16 459 subs x2,x2,#32 460 add w6,w5,#2 461 orr v3.16b,v0.16b,v0.16b 462 orr v1.16b,v0.16b,v0.16b 463 orr v19.16b,v18.16b,v18.16b 464 b.lo .Lcbc_dec_tail 465 466 orr v1.16b,v18.16b,v18.16b 467 ld1 {v18.16b},[x0],#16 468 orr v2.16b,v0.16b,v0.16b 469 orr v3.16b,v1.16b,v1.16b 470 orr v19.16b,v18.16b,v18.16b 471 472.Loop3x_cbc_dec: 473 aesd v0.16b,v16.16b 474 aesimc v0.16b,v0.16b 475 aesd v1.16b,v16.16b 476 aesimc v1.16b,v1.16b 477 aesd v18.16b,v16.16b 478 aesimc v18.16b,v18.16b 479 ld1 {v16.4s},[x7],#16 480 subs w6,w6,#2 481 aesd v0.16b,v17.16b 482 aesimc v0.16b,v0.16b 483 aesd v1.16b,v17.16b 484 aesimc v1.16b,v1.16b 485 aesd v18.16b,v17.16b 486 aesimc v18.16b,v18.16b 487 ld1 {v17.4s},[x7],#16 488 b.gt .Loop3x_cbc_dec 489 490 aesd v0.16b,v16.16b 491 aesimc v0.16b,v0.16b 492 aesd v1.16b,v16.16b 493 aesimc v1.16b,v1.16b 494 aesd v18.16b,v16.16b 495 aesimc v18.16b,v18.16b 496 eor v4.16b,v6.16b,v7.16b 497 subs x2,x2,#0x30 498 eor v5.16b,v2.16b,v7.16b 499 csel x6,x2,x6,lo 500 aesd v0.16b,v17.16b 501 aesimc v0.16b,v0.16b 502 aesd v1.16b,v17.16b 503 aesimc v1.16b,v1.16b 504 aesd v18.16b,v17.16b 505 aesimc v18.16b,v18.16b 506 eor v17.16b,v3.16b,v7.16b 507 add x0,x0,x6 508 509 510 orr v6.16b,v19.16b,v19.16b 511 mov x7,x3 512 aesd v0.16b,v20.16b 513 aesimc v0.16b,v0.16b 514 aesd v1.16b,v20.16b 515 aesimc v1.16b,v1.16b 516 aesd v18.16b,v20.16b 517 aesimc v18.16b,v18.16b 518 ld1 {v2.16b},[x0],#16 519 aesd v0.16b,v21.16b 520 aesimc v0.16b,v0.16b 521 aesd v1.16b,v21.16b 522 aesimc v1.16b,v1.16b 523 aesd v18.16b,v21.16b 524 aesimc v18.16b,v18.16b 525 ld1 {v3.16b},[x0],#16 526 aesd v0.16b,v22.16b 527 aesimc v0.16b,v0.16b 528 aesd v1.16b,v22.16b 529 aesimc v1.16b,v1.16b 530 aesd v18.16b,v22.16b 531 aesimc v18.16b,v18.16b 532 ld1 {v19.16b},[x0],#16 533 aesd v0.16b,v23.16b 534 aesd v1.16b,v23.16b 535 aesd v18.16b,v23.16b 536 ld1 {v16.4s},[x7],#16 537 add w6,w5,#2 538 eor v4.16b,v4.16b,v0.16b 539 eor v5.16b,v5.16b,v1.16b 540 eor v18.16b,v18.16b,v17.16b 541 ld1 {v17.4s},[x7],#16 542 st1 {v4.16b},[x1],#16 543 orr v0.16b,v2.16b,v2.16b 544 st1 {v5.16b},[x1],#16 545 orr v1.16b,v3.16b,v3.16b 546 st1 {v18.16b},[x1],#16 547 orr v18.16b,v19.16b,v19.16b 548 b.hs .Loop3x_cbc_dec 549 550 cmn x2,#0x30 551 b.eq .Lcbc_done 552 nop 553 554.Lcbc_dec_tail: 555 aesd v1.16b,v16.16b 556 aesimc v1.16b,v1.16b 557 aesd v18.16b,v16.16b 558 aesimc v18.16b,v18.16b 559 ld1 {v16.4s},[x7],#16 560 subs w6,w6,#2 561 aesd v1.16b,v17.16b 562 aesimc v1.16b,v1.16b 563 aesd v18.16b,v17.16b 564 aesimc v18.16b,v18.16b 565 ld1 {v17.4s},[x7],#16 566 b.gt .Lcbc_dec_tail 567 568 aesd v1.16b,v16.16b 569 aesimc v1.16b,v1.16b 570 aesd v18.16b,v16.16b 571 aesimc v18.16b,v18.16b 572 aesd v1.16b,v17.16b 573 aesimc v1.16b,v1.16b 574 aesd v18.16b,v17.16b 575 aesimc v18.16b,v18.16b 576 aesd v1.16b,v20.16b 577 aesimc v1.16b,v1.16b 578 aesd v18.16b,v20.16b 579 aesimc v18.16b,v18.16b 580 cmn x2,#0x20 581 aesd v1.16b,v21.16b 582 aesimc v1.16b,v1.16b 583 aesd v18.16b,v21.16b 584 aesimc v18.16b,v18.16b 585 eor v5.16b,v6.16b,v7.16b 586 aesd v1.16b,v22.16b 587 aesimc v1.16b,v1.16b 588 aesd v18.16b,v22.16b 589 aesimc v18.16b,v18.16b 590 eor v17.16b,v3.16b,v7.16b 591 aesd v1.16b,v23.16b 592 aesd v18.16b,v23.16b 593 b.eq .Lcbc_dec_one 594 eor v5.16b,v5.16b,v1.16b 595 eor v17.16b,v17.16b,v18.16b 596 orr v6.16b,v19.16b,v19.16b 597 st1 {v5.16b},[x1],#16 598 st1 {v17.16b},[x1],#16 599 b .Lcbc_done 600 601.Lcbc_dec_one: 602 eor v5.16b,v5.16b,v18.16b 603 orr v6.16b,v19.16b,v19.16b 604 st1 {v5.16b},[x1],#16 605 606.Lcbc_done: 607 st1 {v6.16b},[x4] 608.Lcbc_abort: 609 ldr x29,[sp],#16 610 ret 611.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt 612.globl aes_v8_ctr32_encrypt_blocks 613.type aes_v8_ctr32_encrypt_blocks,%function 614.align 5 615aes_v8_ctr32_encrypt_blocks: 616 stp x29,x30,[sp,#-16]! 617 add x29,sp,#0 618 ldr w5,[x3,#240] 619 620 ldr w8, [x4, #12] 621 ld1 {v0.4s},[x4] 622 623 ld1 {v16.4s,v17.4s},[x3] 624 sub w5,w5,#4 625 mov x12,#16 626 cmp x2,#2 627 add x7,x3,x5,lsl#4 628 sub w5,w5,#2 629 ld1 {v20.4s,v21.4s},[x7],#32 630 ld1 {v22.4s,v23.4s},[x7],#32 631 ld1 {v7.4s},[x7] 632 add x7,x3,#32 633 mov w6,w5 634 csel x12,xzr,x12,lo 635 636 rev w8, w8 637 638 orr v1.16b,v0.16b,v0.16b 639 add w10, w8, #1 640 orr v18.16b,v0.16b,v0.16b 641 add w8, w8, #2 642 orr v6.16b,v0.16b,v0.16b 643 rev w10, w10 644 mov v1.s[3],w10 645 b.ls .Lctr32_tail 646 rev w12, w8 647 sub x2,x2,#3 648 mov v18.s[3],w12 649 b .Loop3x_ctr32 650 651.align 4 652.Loop3x_ctr32: 653 aese v0.16b,v16.16b 654 aesmc v0.16b,v0.16b 655 aese v1.16b,v16.16b 656 aesmc v1.16b,v1.16b 657 aese v18.16b,v16.16b 658 aesmc v18.16b,v18.16b 659 ld1 {v16.4s},[x7],#16 660 subs w6,w6,#2 661 aese v0.16b,v17.16b 662 aesmc v0.16b,v0.16b 663 aese v1.16b,v17.16b 664 aesmc v1.16b,v1.16b 665 aese v18.16b,v17.16b 666 aesmc v18.16b,v18.16b 667 ld1 {v17.4s},[x7],#16 668 b.gt .Loop3x_ctr32 669 670 aese v0.16b,v16.16b 671 aesmc v4.16b,v0.16b 672 aese v1.16b,v16.16b 673 aesmc v5.16b,v1.16b 674 ld1 {v2.16b},[x0],#16 675 orr v0.16b,v6.16b,v6.16b 676 aese v18.16b,v16.16b 677 aesmc v18.16b,v18.16b 678 ld1 {v3.16b},[x0],#16 679 orr v1.16b,v6.16b,v6.16b 680 aese v4.16b,v17.16b 681 aesmc v4.16b,v4.16b 682 aese v5.16b,v17.16b 683 aesmc v5.16b,v5.16b 684 ld1 {v19.16b},[x0],#16 685 mov x7,x3 686 aese v18.16b,v17.16b 687 aesmc v17.16b,v18.16b 688 orr v18.16b,v6.16b,v6.16b 689 add w9,w8,#1 690 aese v4.16b,v20.16b 691 aesmc v4.16b,v4.16b 692 aese v5.16b,v20.16b 693 aesmc v5.16b,v5.16b 694 eor v2.16b,v2.16b,v7.16b 695 add w10,w8,#2 696 aese v17.16b,v20.16b 697 aesmc v17.16b,v17.16b 698 eor v3.16b,v3.16b,v7.16b 699 add w8,w8,#3 700 aese v4.16b,v21.16b 701 aesmc v4.16b,v4.16b 702 aese v5.16b,v21.16b 703 aesmc v5.16b,v5.16b 704 eor v19.16b,v19.16b,v7.16b 705 rev w9,w9 706 aese v17.16b,v21.16b 707 aesmc v17.16b,v17.16b 708 mov v0.s[3], w9 709 rev w10,w10 710 aese v4.16b,v22.16b 711 aesmc v4.16b,v4.16b 712 aese v5.16b,v22.16b 713 aesmc v5.16b,v5.16b 714 mov v1.s[3], w10 715 rev w12,w8 716 aese v17.16b,v22.16b 717 aesmc v17.16b,v17.16b 718 mov v18.s[3], w12 719 subs x2,x2,#3 720 aese v4.16b,v23.16b 721 aese v5.16b,v23.16b 722 aese v17.16b,v23.16b 723 724 eor v2.16b,v2.16b,v4.16b 725 ld1 {v16.4s},[x7],#16 726 st1 {v2.16b},[x1],#16 727 eor v3.16b,v3.16b,v5.16b 728 mov w6,w5 729 st1 {v3.16b},[x1],#16 730 eor v19.16b,v19.16b,v17.16b 731 ld1 {v17.4s},[x7],#16 732 st1 {v19.16b},[x1],#16 733 b.hs .Loop3x_ctr32 734 735 adds x2,x2,#3 736 b.eq .Lctr32_done 737 cmp x2,#1 738 mov x12,#16 739 csel x12,xzr,x12,eq 740 741.Lctr32_tail: 742 aese v0.16b,v16.16b 743 aesmc v0.16b,v0.16b 744 aese v1.16b,v16.16b 745 aesmc v1.16b,v1.16b 746 ld1 {v16.4s},[x7],#16 747 subs w6,w6,#2 748 aese v0.16b,v17.16b 749 aesmc v0.16b,v0.16b 750 aese v1.16b,v17.16b 751 aesmc v1.16b,v1.16b 752 ld1 {v17.4s},[x7],#16 753 b.gt .Lctr32_tail 754 755 aese v0.16b,v16.16b 756 aesmc v0.16b,v0.16b 757 aese v1.16b,v16.16b 758 aesmc v1.16b,v1.16b 759 aese v0.16b,v17.16b 760 aesmc v0.16b,v0.16b 761 aese v1.16b,v17.16b 762 aesmc v1.16b,v1.16b 763 ld1 {v2.16b},[x0],x12 764 aese v0.16b,v20.16b 765 aesmc v0.16b,v0.16b 766 aese v1.16b,v20.16b 767 aesmc v1.16b,v1.16b 768 ld1 {v3.16b},[x0] 769 aese v0.16b,v21.16b 770 aesmc v0.16b,v0.16b 771 aese v1.16b,v21.16b 772 aesmc v1.16b,v1.16b 773 eor v2.16b,v2.16b,v7.16b 774 aese v0.16b,v22.16b 775 aesmc v0.16b,v0.16b 776 aese v1.16b,v22.16b 777 aesmc v1.16b,v1.16b 778 eor v3.16b,v3.16b,v7.16b 779 aese v0.16b,v23.16b 780 aese v1.16b,v23.16b 781 782 cmp x2,#1 783 eor v2.16b,v2.16b,v0.16b 784 eor v3.16b,v3.16b,v1.16b 785 st1 {v2.16b},[x1],#16 786 b.eq .Lctr32_done 787 st1 {v3.16b},[x1] 788 789.Lctr32_done: 790 ldr x29,[sp],#16 791 ret 792.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks 793.section .note.GNU-stack,"",%progbits 794