1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 24 * Copyright (c) 2019-2022 Samuel Neves 25 * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de> 26 * 27 * This is converted assembly: SSE4.1 -> ARMv8-A 28 * Used tools: SIMDe https://github.com/simd-everywhere/simde 29 * 30 * Should work on FreeBSD, Linux and macOS 31 * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh 32 */ 33 34#if defined(__aarch64__) 35 .text 36 .section .note.gnu.property,"a",@note 37 .p2align 3 38 .word 4 39 .word 16 40 .word 5 41 .asciz "GNU" 42 .word 3221225472 43 .word 4 44 .word 3 45 .word 0 46.Lsec_end0: 47 .text 48 .globl zfs_blake3_compress_in_place_sse41 49 .p2align 2 50 .type zfs_blake3_compress_in_place_sse41,@function 51zfs_blake3_compress_in_place_sse41: 52 .cfi_startproc 53 hint #25 54 .cfi_negate_ra_state 55 sub sp, sp, #96 56 stp x29, x30, [sp, #64] 57 add x29, sp, #64 58 str x19, [sp, #80] 59 .cfi_def_cfa w29, 32 60 .cfi_offset w19, -16 61 .cfi_offset w30, -24 62 .cfi_offset w29, -32 63 mov x19, x0 64 mov w5, w4 65 mov x4, x3 66 mov w3, w2 67 mov x2, x1 68 mov x0, sp 69 mov x1, x19 70 bl compress_pre 71 ldp q0, q1, [sp] 72 ldp q2, q3, [sp, #32] 73 eor v0.16b, v2.16b, v0.16b 74 eor v1.16b, v3.16b, v1.16b 75 ldp x29, x30, [sp, #64] 76 stp q0, q1, [x19] 77 ldr x19, [sp, #80] 78 add sp, sp, #96 79 hint #29 80 ret 81.Lfunc_end0: 82 .size zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41 83 .cfi_endproc 84 85 .section .rodata.cst16,"aM",@progbits,16 86 .p2align 4 87.LCPI1_0: 88 .xword -4942790177982912921 89 .xword -6534734903820487822 90.LCPI1_1: 91 .byte 2 92 .byte 3 93 .byte 0 94 .byte 1 95 .byte 6 96 .byte 7 97 .byte 4 98 .byte 5 99 .byte 10 100 .byte 11 101 .byte 8 102 .byte 9 103 .byte 14 104 .byte 15 105 .byte 12 106 .byte 13 107.LCPI1_2: 108 .byte 1 109 .byte 2 110 .byte 3 111 .byte 0 112 .byte 5 113 .byte 6 114 .byte 7 115 .byte 4 116 .byte 9 117 .byte 10 118 .byte 11 119 .byte 8 120 .byte 13 121 .byte 14 122 .byte 15 123 .byte 12 124 .text 125 .p2align 2 126 .type compress_pre,@function 127compress_pre: 128 .cfi_startproc 129 hint #34 130 fmov s1, w3 131 movi d0, #0x0000ff000000ff 132 ldr q2, [x1] 133 adrp x8, .LCPI1_0 134 mov v1.s[1], w5 135 str q2, [x0] 136 ldr q4, [x8, :lo12:.LCPI1_0] 137 ldr q5, [x1, #16] 138 adrp x8, .LCPI1_1 139 and v0.8b, v1.8b, v0.8b 140 fmov d1, x4 141 stp q5, q4, [x0, #16] 142 mov v1.d[1], v0.d[0] 143 str q1, [x0, #48] 144 ldp q6, q7, [x2] 145 uzp1 v3.4s, v6.4s, v7.4s 146 add v0.4s, v2.4s, v3.4s 147 uzp2 v2.4s, v6.4s, v7.4s 148 add v16.4s, v0.4s, v5.4s 149 ldr q0, [x8, :lo12:.LCPI1_1] 150 adrp x8, .LCPI1_2 151 eor v1.16b, v16.16b, v1.16b 152 add v7.4s, v16.4s, v2.4s 153 tbl v1.16b, { v1.16b }, v0.16b 154 add v4.4s, v1.4s, v4.4s 155 eor v5.16b, v4.16b, v5.16b 156 ushr v6.4s, v5.4s, #12 157 shl v5.4s, v5.4s, #20 158 orr v5.16b, v5.16b, v6.16b 159 add v6.4s, v7.4s, v5.4s 160 eor v7.16b, v1.16b, v6.16b 161 ldr q1, [x8, :lo12:.LCPI1_2] 162 add x8, x2, #32 163 tbl v7.16b, { v7.16b }, v1.16b 164 ld2 { v16.4s, v17.4s }, [x8] 165 add v4.4s, v4.4s, v7.4s 166 ext v7.16b, v7.16b, v7.16b, #8 167 add v6.4s, v6.4s, v16.4s 168 eor v5.16b, v4.16b, v5.16b 169 ext v4.16b, v4.16b, v4.16b, #4 170 ext v16.16b, v16.16b, v16.16b, #12 171 ext v6.16b, v6.16b, v6.16b, #12 172 ushr v18.4s, v5.4s, #7 173 shl v5.4s, v5.4s, #25 174 orr v5.16b, v5.16b, v18.16b 175 ext v18.16b, v17.16b, v17.16b, #12 176 add v6.4s, v6.4s, v5.4s 177 mov v17.16b, v18.16b 178 eor v7.16b, v7.16b, v6.16b 179 add v6.4s, v6.4s, v18.4s 180 mov v17.s[1], v16.s[2] 181 tbl v7.16b, { v7.16b }, v0.16b 182 add v4.4s, v4.4s, v7.4s 183 eor v5.16b, v4.16b, v5.16b 184 ushr v19.4s, v5.4s, #12 185 shl v5.4s, v5.4s, #20 186 orr v5.16b, v5.16b, v19.16b 187 uzp1 v19.4s, v3.4s, v3.4s 188 add v6.4s, v6.4s, v5.4s 189 ext v19.16b, v19.16b, v3.16b, #8 190 eor v7.16b, v7.16b, v6.16b 191 uzp2 v19.4s, v19.4s, v2.4s 192 tbl v7.16b, { v7.16b }, v1.16b 193 add v6.4s, v6.4s, v19.4s 194 add v4.4s, v4.4s, v7.4s 195 ext v6.16b, v6.16b, v6.16b, #4 196 ext v7.16b, v7.16b, v7.16b, #8 197 eor v5.16b, v4.16b, v5.16b 198 ext v4.16b, v4.16b, v4.16b, #12 199 ushr v20.4s, v5.4s, #7 200 shl v5.4s, v5.4s, #25 201 orr v5.16b, v5.16b, v20.16b 202 ext v20.16b, v3.16b, v3.16b, #12 203 add v6.4s, v6.4s, v5.4s 204 ext v3.16b, v3.16b, v20.16b, #12 205 eor v7.16b, v7.16b, v6.16b 206 rev64 v3.4s, v3.4s 207 tbl v7.16b, { v7.16b }, v0.16b 208 trn2 v3.4s, v3.4s, v17.4s 209 add v4.4s, v4.4s, v7.4s 210 add v6.4s, v6.4s, v3.4s 211 eor v5.16b, v4.16b, v5.16b 212 ushr v17.4s, v5.4s, #12 213 shl v5.4s, v5.4s, #20 214 orr v5.16b, v5.16b, v17.16b 215 zip1 v17.2d, v18.2d, v2.2d 216 zip2 v2.4s, v2.4s, v18.4s 217 add v6.4s, v6.4s, v5.4s 218 mov v17.s[3], v16.s[3] 219 zip1 v18.4s, v2.4s, v16.4s 220 zip1 v2.4s, v16.4s, v2.4s 221 eor v7.16b, v7.16b, v6.16b 222 ext v6.16b, v6.16b, v6.16b, #12 223 ext v16.16b, v2.16b, v18.16b, #8 224 tbl v7.16b, { v7.16b }, v1.16b 225 add v20.4s, v4.4s, v7.4s 226 ext v4.16b, v17.16b, v17.16b, #12 227 ext v7.16b, v7.16b, v7.16b, #8 228 eor v5.16b, v20.16b, v5.16b 229 uzp1 v4.4s, v17.4s, v4.4s 230 ushr v17.4s, v5.4s, #7 231 shl v5.4s, v5.4s, #25 232 add v6.4s, v6.4s, v4.4s 233 orr v5.16b, v5.16b, v17.16b 234 ext v17.16b, v20.16b, v20.16b, #4 235 add v6.4s, v6.4s, v5.4s 236 eor v7.16b, v7.16b, v6.16b 237 add v6.4s, v6.4s, v16.4s 238 tbl v7.16b, { v7.16b }, v0.16b 239 add v17.4s, v17.4s, v7.4s 240 eor v5.16b, v17.16b, v5.16b 241 ushr v2.4s, v5.4s, #12 242 shl v5.4s, v5.4s, #20 243 orr v2.16b, v5.16b, v2.16b 244 add v5.4s, v6.4s, v2.4s 245 ext v6.16b, v19.16b, v19.16b, #4 246 eor v7.16b, v7.16b, v5.16b 247 uzp1 v18.4s, v6.4s, v6.4s 248 tbl v7.16b, { v7.16b }, v1.16b 249 ext v18.16b, v18.16b, v6.16b, #8 250 add v17.4s, v17.4s, v7.4s 251 uzp2 v18.4s, v18.4s, v3.4s 252 ext v7.16b, v7.16b, v7.16b, #8 253 eor v2.16b, v17.16b, v2.16b 254 add v5.4s, v5.4s, v18.4s 255 ext v17.16b, v17.16b, v17.16b, #12 256 ushr v19.4s, v2.4s, #7 257 shl v2.4s, v2.4s, #25 258 ext v5.16b, v5.16b, v5.16b, #4 259 orr v2.16b, v2.16b, v19.16b 260 ext v19.16b, v6.16b, v6.16b, #12 261 add v5.4s, v5.4s, v2.4s 262 ext v6.16b, v6.16b, v19.16b, #12 263 mov v19.16b, v16.16b 264 eor v7.16b, v7.16b, v5.16b 265 rev64 v6.4s, v6.4s 266 mov v19.s[1], v4.s[2] 267 tbl v7.16b, { v7.16b }, v0.16b 268 add v17.4s, v17.4s, v7.4s 269 eor v20.16b, v17.16b, v2.16b 270 trn2 v2.4s, v6.4s, v19.4s 271 ushr v6.4s, v20.4s, #12 272 shl v19.4s, v20.4s, #20 273 add v5.4s, v5.4s, v2.4s 274 orr v6.16b, v19.16b, v6.16b 275 add v19.4s, v5.4s, v6.4s 276 eor v5.16b, v7.16b, v19.16b 277 zip1 v7.2d, v16.2d, v3.2d 278 zip2 v3.4s, v3.4s, v16.4s 279 tbl v20.16b, { v5.16b }, v1.16b 280 mov v7.s[3], v4.s[3] 281 add v17.4s, v17.4s, v20.4s 282 ext v5.16b, v7.16b, v7.16b, #12 283 eor v6.16b, v17.16b, v6.16b 284 uzp1 v5.4s, v7.4s, v5.4s 285 ext v7.16b, v19.16b, v19.16b, #12 286 ext v17.16b, v17.16b, v17.16b, #4 287 ushr v19.4s, v6.4s, #7 288 shl v6.4s, v6.4s, #25 289 add v7.4s, v7.4s, v5.4s 290 orr v6.16b, v6.16b, v19.16b 291 ext v19.16b, v20.16b, v20.16b, #8 292 add v7.4s, v7.4s, v6.4s 293 eor v19.16b, v19.16b, v7.16b 294 tbl v19.16b, { v19.16b }, v0.16b 295 add v16.4s, v17.4s, v19.4s 296 zip1 v17.4s, v3.4s, v4.4s 297 zip1 v3.4s, v4.4s, v3.4s 298 eor v4.16b, v16.16b, v6.16b 299 ext v17.16b, v3.16b, v17.16b, #8 300 ushr v3.4s, v4.4s, #12 301 shl v4.4s, v4.4s, #20 302 add v6.4s, v7.4s, v17.4s 303 orr v3.16b, v4.16b, v3.16b 304 add v4.4s, v6.4s, v3.4s 305 ext v6.16b, v18.16b, v18.16b, #4 306 eor v7.16b, v19.16b, v4.16b 307 uzp1 v18.4s, v6.4s, v6.4s 308 tbl v7.16b, { v7.16b }, v1.16b 309 ext v18.16b, v18.16b, v6.16b, #8 310 add v16.4s, v16.4s, v7.4s 311 uzp2 v18.4s, v18.4s, v2.4s 312 ext v7.16b, v7.16b, v7.16b, #8 313 eor v3.16b, v16.16b, v3.16b 314 add v4.4s, v4.4s, v18.4s 315 ext v16.16b, v16.16b, v16.16b, #12 316 ushr v19.4s, v3.4s, #7 317 shl v3.4s, v3.4s, #25 318 ext v4.16b, v4.16b, v4.16b, #4 319 orr v3.16b, v3.16b, v19.16b 320 ext v19.16b, v6.16b, v6.16b, #12 321 add v4.4s, v4.4s, v3.4s 322 ext v6.16b, v6.16b, v19.16b, #12 323 mov v19.16b, v17.16b 324 eor v7.16b, v7.16b, v4.16b 325 rev64 v6.4s, v6.4s 326 mov v19.s[1], v5.s[2] 327 tbl v7.16b, { v7.16b }, v0.16b 328 add v16.4s, v16.4s, v7.4s 329 eor v20.16b, v16.16b, v3.16b 330 trn2 v3.4s, v6.4s, v19.4s 331 ushr v6.4s, v20.4s, #12 332 shl v19.4s, v20.4s, #20 333 add v4.4s, v4.4s, v3.4s 334 orr v6.16b, v19.16b, v6.16b 335 zip1 v19.2d, v17.2d, v2.2d 336 zip2 v2.4s, v2.4s, v17.4s 337 add v4.4s, v4.4s, v6.4s 338 mov v19.s[3], v5.s[3] 339 zip1 v17.4s, v2.4s, v5.4s 340 zip1 v2.4s, v5.4s, v2.4s 341 eor v7.16b, v7.16b, v4.16b 342 ext v20.16b, v19.16b, v19.16b, #12 343 ext v4.16b, v4.16b, v4.16b, #12 344 ext v2.16b, v2.16b, v17.16b, #8 345 tbl v7.16b, { v7.16b }, v1.16b 346 add v16.4s, v16.4s, v7.4s 347 ext v7.16b, v7.16b, v7.16b, #8 348 eor v21.16b, v16.16b, v6.16b 349 uzp1 v6.4s, v19.4s, v20.4s 350 ext v16.16b, v16.16b, v16.16b, #4 351 ushr v19.4s, v21.4s, #7 352 shl v20.4s, v21.4s, #25 353 add v4.4s, v4.4s, v6.4s 354 orr v19.16b, v20.16b, v19.16b 355 add v4.4s, v4.4s, v19.4s 356 eor v7.16b, v7.16b, v4.16b 357 add v4.4s, v4.4s, v2.4s 358 tbl v7.16b, { v7.16b }, v0.16b 359 add v16.4s, v16.4s, v7.4s 360 eor v5.16b, v16.16b, v19.16b 361 ushr v17.4s, v5.4s, #12 362 shl v5.4s, v5.4s, #20 363 orr v5.16b, v5.16b, v17.16b 364 ext v17.16b, v18.16b, v18.16b, #4 365 add v4.4s, v4.4s, v5.4s 366 uzp1 v18.4s, v17.4s, v17.4s 367 eor v7.16b, v7.16b, v4.16b 368 ext v18.16b, v18.16b, v17.16b, #8 369 tbl v7.16b, { v7.16b }, v1.16b 370 uzp2 v18.4s, v18.4s, v3.4s 371 add v16.4s, v16.4s, v7.4s 372 add v4.4s, v4.4s, v18.4s 373 ext v7.16b, v7.16b, v7.16b, #8 374 eor v5.16b, v16.16b, v5.16b 375 ext v4.16b, v4.16b, v4.16b, #4 376 ext v16.16b, v16.16b, v16.16b, #12 377 ushr v19.4s, v5.4s, #7 378 shl v5.4s, v5.4s, #25 379 orr v5.16b, v5.16b, v19.16b 380 add v19.4s, v4.4s, v5.4s 381 eor v4.16b, v7.16b, v19.16b 382 ext v7.16b, v17.16b, v17.16b, #12 383 tbl v20.16b, { v4.16b }, v0.16b 384 ext v4.16b, v17.16b, v7.16b, #12 385 mov v7.16b, v2.16b 386 add v16.4s, v16.4s, v20.4s 387 rev64 v4.4s, v4.4s 388 mov v7.s[1], v6.s[2] 389 eor v5.16b, v16.16b, v5.16b 390 trn2 v4.4s, v4.4s, v7.4s 391 ushr v7.4s, v5.4s, #12 392 shl v5.4s, v5.4s, #20 393 add v17.4s, v19.4s, v4.4s 394 zip1 v19.2d, v2.2d, v3.2d 395 zip2 v2.4s, v3.4s, v2.4s 396 orr v5.16b, v5.16b, v7.16b 397 mov v19.s[3], v6.s[3] 398 add v7.4s, v17.4s, v5.4s 399 eor v17.16b, v20.16b, v7.16b 400 ext v20.16b, v19.16b, v19.16b, #12 401 ext v7.16b, v7.16b, v7.16b, #12 402 tbl v17.16b, { v17.16b }, v1.16b 403 add v16.4s, v16.4s, v17.4s 404 ext v17.16b, v17.16b, v17.16b, #8 405 eor v21.16b, v16.16b, v5.16b 406 uzp1 v5.4s, v19.4s, v20.4s 407 ext v16.16b, v16.16b, v16.16b, #4 408 ushr v19.4s, v21.4s, #7 409 shl v20.4s, v21.4s, #25 410 add v7.4s, v7.4s, v5.4s 411 orr v19.16b, v20.16b, v19.16b 412 add v7.4s, v7.4s, v19.4s 413 eor v17.16b, v17.16b, v7.16b 414 tbl v17.16b, { v17.16b }, v0.16b 415 add v3.4s, v16.4s, v17.4s 416 zip1 v16.4s, v2.4s, v6.4s 417 zip1 v2.4s, v6.4s, v2.4s 418 eor v6.16b, v3.16b, v19.16b 419 ext v16.16b, v2.16b, v16.16b, #8 420 ushr v2.4s, v6.4s, #12 421 shl v6.4s, v6.4s, #20 422 add v7.4s, v7.4s, v16.4s 423 orr v2.16b, v6.16b, v2.16b 424 add v6.4s, v7.4s, v2.4s 425 ext v7.16b, v18.16b, v18.16b, #4 426 eor v17.16b, v17.16b, v6.16b 427 uzp1 v18.4s, v7.4s, v7.4s 428 tbl v17.16b, { v17.16b }, v1.16b 429 ext v18.16b, v18.16b, v7.16b, #8 430 add v3.4s, v3.4s, v17.4s 431 uzp2 v18.4s, v18.4s, v4.4s 432 eor v2.16b, v3.16b, v2.16b 433 add v6.4s, v6.4s, v18.4s 434 ext v3.16b, v3.16b, v3.16b, #12 435 ext v18.16b, v18.16b, v18.16b, #4 436 ushr v19.4s, v2.4s, #7 437 shl v2.4s, v2.4s, #25 438 ext v6.16b, v6.16b, v6.16b, #4 439 orr v19.16b, v2.16b, v19.16b 440 ext v2.16b, v17.16b, v17.16b, #8 441 ext v17.16b, v7.16b, v7.16b, #12 442 add v6.4s, v6.4s, v19.4s 443 eor v2.16b, v2.16b, v6.16b 444 tbl v20.16b, { v2.16b }, v0.16b 445 ext v2.16b, v7.16b, v17.16b, #12 446 mov v7.16b, v16.16b 447 add v17.4s, v3.4s, v20.4s 448 rev64 v3.4s, v2.4s 449 mov v7.s[1], v5.s[2] 450 eor v19.16b, v17.16b, v19.16b 451 trn2 v3.4s, v3.4s, v7.4s 452 ushr v21.4s, v19.4s, #12 453 shl v19.4s, v19.4s, #20 454 add v6.4s, v6.4s, v3.4s 455 orr v19.16b, v19.16b, v21.16b 456 add v21.4s, v6.4s, v19.4s 457 eor v6.16b, v20.16b, v21.16b 458 zip1 v20.2d, v16.2d, v4.2d 459 zip2 v4.4s, v4.4s, v16.4s 460 tbl v22.16b, { v6.16b }, v1.16b 461 mov v20.s[3], v5.s[3] 462 add v17.4s, v17.4s, v22.4s 463 ext v6.16b, v20.16b, v20.16b, #12 464 eor v19.16b, v17.16b, v19.16b 465 uzp1 v6.4s, v20.4s, v6.4s 466 ext v20.16b, v21.16b, v21.16b, #12 467 ext v17.16b, v17.16b, v17.16b, #4 468 ushr v21.4s, v19.4s, #7 469 shl v19.4s, v19.4s, #25 470 add v20.4s, v20.4s, v6.4s 471 orr v19.16b, v19.16b, v21.16b 472 ext v21.16b, v22.16b, v22.16b, #8 473 add v20.4s, v20.4s, v19.4s 474 eor v21.16b, v21.16b, v20.16b 475 tbl v21.16b, { v21.16b }, v0.16b 476 add v16.4s, v17.4s, v21.4s 477 zip1 v17.4s, v4.4s, v5.4s 478 zip1 v4.4s, v5.4s, v4.4s 479 eor v5.16b, v16.16b, v19.16b 480 ext v4.16b, v4.16b, v17.16b, #8 481 ushr v17.4s, v5.4s, #12 482 shl v5.4s, v5.4s, #20 483 add v19.4s, v20.4s, v4.4s 484 ext v20.16b, v18.16b, v18.16b, #8 485 zip1 v3.2d, v4.2d, v3.2d 486 orr v5.16b, v5.16b, v17.16b 487 zip2 v2.4s, v2.4s, v4.4s 488 uzp2 v7.4s, v20.4s, v7.4s 489 mov v3.s[3], v6.s[3] 490 add v17.4s, v19.4s, v5.4s 491 ext v7.16b, v7.16b, v20.16b, #4 492 eor v19.16b, v21.16b, v17.16b 493 ext v17.16b, v17.16b, v17.16b, #4 494 tbl v19.16b, { v19.16b }, v1.16b 495 add v7.4s, v17.4s, v7.4s 496 add v16.4s, v16.4s, v19.4s 497 ext v17.16b, v19.16b, v19.16b, #8 498 ext v19.16b, v18.16b, v18.16b, #12 499 eor v5.16b, v16.16b, v5.16b 500 ext v16.16b, v16.16b, v16.16b, #12 501 ext v18.16b, v18.16b, v19.16b, #12 502 mov v19.16b, v4.16b 503 ushr v20.4s, v5.4s, #7 504 shl v5.4s, v5.4s, #25 505 rev64 v18.4s, v18.4s 506 mov v19.s[1], v6.s[2] 507 orr v5.16b, v5.16b, v20.16b 508 trn2 v18.4s, v18.4s, v19.4s 509 add v7.4s, v5.4s, v7.4s 510 eor v17.16b, v17.16b, v7.16b 511 add v7.4s, v7.4s, v18.4s 512 ext v18.16b, v3.16b, v3.16b, #12 513 tbl v17.16b, { v17.16b }, v0.16b 514 uzp1 v3.4s, v3.4s, v18.4s 515 add v16.4s, v16.4s, v17.4s 516 eor v5.16b, v16.16b, v5.16b 517 ushr v19.4s, v5.4s, #12 518 shl v5.4s, v5.4s, #20 519 orr v5.16b, v5.16b, v19.16b 520 add v7.4s, v7.4s, v5.4s 521 eor v17.16b, v17.16b, v7.16b 522 ext v7.16b, v7.16b, v7.16b, #12 523 tbl v17.16b, { v17.16b }, v1.16b 524 add v3.4s, v7.4s, v3.4s 525 add v16.4s, v16.4s, v17.4s 526 ext v7.16b, v17.16b, v17.16b, #8 527 eor v5.16b, v16.16b, v5.16b 528 ext v16.16b, v16.16b, v16.16b, #4 529 ushr v18.4s, v5.4s, #7 530 shl v5.4s, v5.4s, #25 531 orr v5.16b, v5.16b, v18.16b 532 add v3.4s, v3.4s, v5.4s 533 eor v7.16b, v7.16b, v3.16b 534 tbl v0.16b, { v7.16b }, v0.16b 535 zip1 v7.4s, v2.4s, v6.4s 536 zip1 v2.4s, v6.4s, v2.4s 537 add v4.4s, v16.4s, v0.4s 538 ext v2.16b, v2.16b, v7.16b, #8 539 eor v5.16b, v4.16b, v5.16b 540 add v2.4s, v3.4s, v2.4s 541 ushr v6.4s, v5.4s, #12 542 shl v5.4s, v5.4s, #20 543 orr v3.16b, v5.16b, v6.16b 544 add v2.4s, v2.4s, v3.4s 545 eor v0.16b, v0.16b, v2.16b 546 ext v2.16b, v2.16b, v2.16b, #4 547 tbl v0.16b, { v0.16b }, v1.16b 548 add v1.4s, v4.4s, v0.4s 549 ext v0.16b, v0.16b, v0.16b, #8 550 eor v3.16b, v1.16b, v3.16b 551 ext v1.16b, v1.16b, v1.16b, #12 552 ushr v4.4s, v3.4s, #7 553 shl v3.4s, v3.4s, #25 554 stp q1, q0, [x0, #32] 555 orr v3.16b, v3.16b, v4.16b 556 stp q2, q3, [x0] 557 ret 558.Lfunc_end1: 559 .size compress_pre, .Lfunc_end1-compress_pre 560 .cfi_endproc 561 562 .globl zfs_blake3_compress_xof_sse41 563 .p2align 2 564 .type zfs_blake3_compress_xof_sse41,@function 565zfs_blake3_compress_xof_sse41: 566 .cfi_startproc 567 hint #25 568 .cfi_negate_ra_state 569 sub sp, sp, #96 570 stp x29, x30, [sp, #64] 571 add x29, sp, #64 572 stp x20, x19, [sp, #80] 573 .cfi_def_cfa w29, 32 574 .cfi_offset w19, -8 575 .cfi_offset w20, -16 576 .cfi_offset w30, -24 577 .cfi_offset w29, -32 578 mov x20, x0 579 mov x19, x5 580 mov w5, w4 581 mov x4, x3 582 mov w3, w2 583 mov x2, x1 584 mov x0, sp 585 mov x1, x20 586 bl compress_pre 587 ldp q0, q1, [sp] 588 ldp q2, q3, [sp, #32] 589 eor v0.16b, v2.16b, v0.16b 590 eor v1.16b, v3.16b, v1.16b 591 ldp x29, x30, [sp, #64] 592 stp q0, q1, [x19] 593 ldr q0, [x20] 594 eor v0.16b, v0.16b, v2.16b 595 str q0, [x19, #32] 596 ldr q0, [x20, #16] 597 eor v0.16b, v0.16b, v3.16b 598 str q0, [x19, #48] 599 ldp x20, x19, [sp, #80] 600 add sp, sp, #96 601 hint #29 602 ret 603.Lfunc_end2: 604 .size zfs_blake3_compress_xof_sse41, .Lfunc_end2-zfs_blake3_compress_xof_sse41 605 .cfi_endproc 606 607 .section .rodata.cst16,"aM",@progbits,16 608 .p2align 4 609.LCPI3_0: 610 .word 0 611 .word 1 612 .word 2 613 .word 3 614.LCPI3_1: 615 .byte 2 616 .byte 3 617 .byte 0 618 .byte 1 619 .byte 6 620 .byte 7 621 .byte 4 622 .byte 5 623 .byte 10 624 .byte 11 625 .byte 8 626 .byte 9 627 .byte 14 628 .byte 15 629 .byte 12 630 .byte 13 631.LCPI3_2: 632 .byte 1 633 .byte 2 634 .byte 3 635 .byte 0 636 .byte 5 637 .byte 6 638 .byte 7 639 .byte 4 640 .byte 9 641 .byte 10 642 .byte 11 643 .byte 8 644 .byte 13 645 .byte 14 646 .byte 15 647 .byte 12 648.LCPI3_3: 649 .word 1779033703 650 .word 3144134277 651 .word 1013904242 652 .word 2773480762 653 .text 654 .globl zfs_blake3_hash_many_sse41 655 .p2align 2 656 .type zfs_blake3_hash_many_sse41,@function 657zfs_blake3_hash_many_sse41: 658 .cfi_startproc 659 hint #34 660 stp d15, d14, [sp, #-144]! 661 stp d13, d12, [sp, #16] 662 stp d11, d10, [sp, #32] 663 stp d9, d8, [sp, #48] 664 stp x29, x27, [sp, #64] 665 stp x26, x25, [sp, #80] 666 stp x24, x23, [sp, #96] 667 stp x22, x21, [sp, #112] 668 stp x20, x19, [sp, #128] 669 sub sp, sp, #368 670 .cfi_def_cfa_offset 512 671 .cfi_offset w19, -8 672 .cfi_offset w20, -16 673 .cfi_offset w21, -24 674 .cfi_offset w22, -32 675 .cfi_offset w23, -40 676 .cfi_offset w24, -48 677 .cfi_offset w25, -56 678 .cfi_offset w26, -64 679 .cfi_offset w27, -72 680 .cfi_offset w29, -80 681 .cfi_offset b8, -88 682 .cfi_offset b9, -96 683 .cfi_offset b10, -104 684 .cfi_offset b11, -112 685 .cfi_offset b12, -120 686 .cfi_offset b13, -128 687 .cfi_offset b14, -136 688 .cfi_offset b15, -144 689 ldr x8, [sp, #520] 690 adrp x11, .LCPI3_1 691 ldrb w9, [sp, #512] 692 adrp x10, .LCPI3_2 693 cmp x1, #4 694 b.lo .LBB3_6 695 adrp x12, .LCPI3_0 696 sbfx w13, w5, #0, #1 697 mov w15, #58983 698 mov w16, #44677 699 movk w15, #27145, lsl #16 700 movk w16, #47975, lsl #16 701 ldr q0, [x12, :lo12:.LCPI3_0] 702 dup v1.4s, w13 703 movi v13.4s, #64 704 mov w13, #62322 705 mov w14, #62778 706 orr w12, w7, w6 707 and v0.16b, v1.16b, v0.16b 708 ldr q1, [x11, :lo12:.LCPI3_1] 709 movk w13, #15470, lsl #16 710 movk w14, #42319, lsl #16 711 dup v14.4s, w15 712 stp q0, q1, [sp, #16] 713 orr v0.4s, #128, lsl #24 714 str q0, [sp] 715 dup v0.4s, w16 716 stp q0, q14, [sp, #48] 717 b .LBB3_3 718.LBB3_2: 719 zip1 v0.4s, v29.4s, v8.4s 720 add x15, x4, #4 721 zip1 v1.4s, v30.4s, v31.4s 722 tst w5, #0x1 723 zip1 v2.4s, v24.4s, v18.4s 724 csel x4, x15, x4, ne 725 zip1 v3.4s, v25.4s, v26.4s 726 add x0, x0, #32 727 zip2 v6.4s, v29.4s, v8.4s 728 sub x1, x1, #4 729 zip1 v4.2d, v0.2d, v1.2d 730 cmp x1, #3 731 zip2 v7.4s, v30.4s, v31.4s 732 zip1 v5.2d, v2.2d, v3.2d 733 zip2 v0.2d, v0.2d, v1.2d 734 zip2 v1.2d, v2.2d, v3.2d 735 zip2 v2.4s, v24.4s, v18.4s 736 zip2 v3.4s, v25.4s, v26.4s 737 stp q4, q5, [x8] 738 zip2 v4.2d, v6.2d, v7.2d 739 stp q0, q1, [x8, #32] 740 zip1 v0.2d, v6.2d, v7.2d 741 zip1 v1.2d, v2.2d, v3.2d 742 zip2 v2.2d, v2.2d, v3.2d 743 stp q0, q1, [x8, #64] 744 stp q4, q2, [x8, #96] 745 add x8, x8, #128 746 b.ls .LBB3_6 747.LBB3_3: 748 mov x15, x3 749 add x16, x3, #8 750 add x17, x3, #12 751 add x19, x3, #16 752 add x20, x3, #20 753 ld1r { v29.4s }, [x15], #4 754 ld1r { v30.4s }, [x16] 755 add x16, x3, #24 756 ld1r { v31.4s }, [x17] 757 add x17, x3, #28 758 ld1r { v24.4s }, [x19] 759 ld1r { v18.4s }, [x20] 760 ld1r { v25.4s }, [x16] 761 ld1r { v8.4s }, [x15] 762 ld1r { v26.4s }, [x17] 763 cbz x2, .LBB3_2 764 ldr q1, [sp, #16] 765 dup v0.4s, w4 766 lsr x17, x4, #32 767 mov x15, xzr 768 ldp x19, x20, [x0, #16] 769 add v1.4s, v0.4s, v1.4s 770 mov x21, x2 771 movi v0.4s, #128, lsl #24 772 mov w26, w12 773 str q1, [sp, #96] 774 eor v0.16b, v1.16b, v0.16b 775 ldr q1, [sp] 776 cmgt v0.4s, v1.4s, v0.4s 777 dup v1.4s, w17 778 ldp x16, x17, [x0] 779 sub v0.4s, v1.4s, v0.4s 780 str q0, [sp, #80] 781.LBB3_5: 782 add x23, x16, x15 783 add x24, x17, x15 784 add x22, x19, x15 785 add x25, x20, x15 786 subs x21, x21, #1 787 add x15, x15, #64 788 ldp q1, q2, [x23] 789 csel w27, w9, wzr, eq 790 orr w26, w27, w26 791 and w26, w26, #0xff 792 ldp q4, q5, [x24] 793 dup v0.4s, w26 794 mov w26, w6 795 zip1 v22.4s, v1.4s, v4.4s 796 zip2 v20.4s, v1.4s, v4.4s 797 ldp q6, q7, [x22] 798 zip1 v17.4s, v2.4s, v5.4s 799 zip2 v23.4s, v2.4s, v5.4s 800 ldp q16, q21, [x25] 801 zip1 v19.4s, v6.4s, v16.4s 802 zip2 v1.4s, v6.4s, v16.4s 803 ldp q27, q28, [x23, #32] 804 zip1 v4.4s, v7.4s, v21.4s 805 zip2 v5.4s, v7.4s, v21.4s 806 zip2 v15.2d, v17.2d, v4.2d 807 ldp q9, q10, [x24, #32] 808 mov v17.d[1], v4.d[0] 809 add v4.4s, v30.4s, v25.4s 810 zip2 v11.2d, v23.2d, v5.2d 811 zip2 v3.4s, v27.4s, v9.4s 812 zip1 v7.4s, v27.4s, v9.4s 813 ldp q12, q6, [x22, #32] 814 mov v23.d[1], v5.d[0] 815 stp q11, q3, [sp, #256] 816 add v5.4s, v31.4s, v26.4s 817 add v4.4s, v4.4s, v17.4s 818 str q23, [sp, #352] 819 ldp q16, q2, [x25, #32] 820 add v5.4s, v5.4s, v23.4s 821 zip1 v3.4s, v12.4s, v16.4s 822 eor v0.16b, v5.16b, v0.16b 823 zip1 v9.4s, v6.4s, v2.4s 824 zip2 v2.4s, v6.4s, v2.4s 825 stp q7, q3, [sp, #208] 826 zip2 v3.4s, v12.4s, v16.4s 827 zip1 v12.4s, v28.4s, v10.4s 828 zip2 v10.4s, v28.4s, v10.4s 829 stp q17, q2, [sp, #160] 830 zip2 v28.2d, v22.2d, v19.2d 831 mov v22.d[1], v19.d[0] 832 str q3, [sp, #240] 833 add v2.4s, v8.4s, v18.4s 834 eor v16.16b, v4.16b, v13.16b 835 dup v17.4s, w13 836 mov v3.16b, v22.16b 837 stp q22, q28, [sp, #320] 838 zip2 v22.2d, v20.2d, v1.2d 839 mov v20.d[1], v1.d[0] 840 add v1.4s, v29.4s, v24.4s 841 add v4.4s, v4.4s, v15.4s 842 add v5.4s, v5.4s, v11.4s 843 add v2.4s, v2.4s, v20.4s 844 stp q15, q20, [sp, #288] 845 add v1.4s, v1.4s, v3.4s 846 ldr q3, [sp, #96] 847 dup v20.4s, w14 848 mov v23.16b, v22.16b 849 mov v15.16b, v10.16b 850 eor v6.16b, v1.16b, v3.16b 851 ldr q3, [sp, #80] 852 add v1.4s, v1.4s, v28.4s 853 ldr q28, [sp, #272] 854 str q23, [sp, #128] 855 eor v7.16b, v2.16b, v3.16b 856 ldp q27, q3, [sp, #32] 857 add v2.4s, v2.4s, v22.4s 858 tbl v6.16b, { v6.16b }, v27.16b 859 tbl v7.16b, { v7.16b }, v27.16b 860 tbl v16.16b, { v16.16b }, v27.16b 861 tbl v0.16b, { v0.16b }, v27.16b 862 add v19.4s, v6.4s, v14.4s 863 add v21.4s, v7.4s, v3.4s 864 add v30.4s, v16.4s, v17.4s 865 add v31.4s, v0.4s, v20.4s 866 eor v24.16b, v19.16b, v24.16b 867 eor v17.16b, v21.16b, v18.16b 868 ushr v18.4s, v24.4s, #12 869 shl v20.4s, v24.4s, #20 870 eor v24.16b, v30.16b, v25.16b 871 eor v25.16b, v31.16b, v26.16b 872 ushr v26.4s, v17.4s, #12 873 shl v17.4s, v17.4s, #20 874 ushr v29.4s, v24.4s, #12 875 shl v24.4s, v24.4s, #20 876 ushr v8.4s, v25.4s, #12 877 shl v25.4s, v25.4s, #20 878 orr v3.16b, v20.16b, v18.16b 879 ldr q18, [x10, :lo12:.LCPI3_2] 880 orr v13.16b, v17.16b, v26.16b 881 orr v24.16b, v24.16b, v29.16b 882 orr v14.16b, v25.16b, v8.16b 883 add v8.4s, v1.4s, v3.4s 884 add v29.4s, v2.4s, v13.4s 885 add v17.4s, v4.4s, v24.4s 886 add v20.4s, v5.4s, v14.4s 887 eor v1.16b, v6.16b, v8.16b 888 eor v2.16b, v7.16b, v29.16b 889 eor v4.16b, v16.16b, v17.16b 890 eor v0.16b, v0.16b, v20.16b 891 tbl v25.16b, { v1.16b }, v18.16b 892 tbl v16.16b, { v2.16b }, v18.16b 893 tbl v6.16b, { v4.16b }, v18.16b 894 tbl v4.16b, { v0.16b }, v18.16b 895 add v19.4s, v19.4s, v25.4s 896 add v21.4s, v21.4s, v16.4s 897 add v26.4s, v30.4s, v6.4s 898 add v7.4s, v31.4s, v4.4s 899 eor v0.16b, v19.16b, v3.16b 900 eor v1.16b, v21.16b, v13.16b 901 eor v2.16b, v26.16b, v24.16b 902 eor v3.16b, v7.16b, v14.16b 903 ushr v5.4s, v0.4s, #7 904 shl v0.4s, v0.4s, #25 905 ushr v24.4s, v1.4s, #7 906 shl v1.4s, v1.4s, #25 907 ushr v30.4s, v2.4s, #7 908 shl v2.4s, v2.4s, #25 909 orr v5.16b, v0.16b, v5.16b 910 orr v0.16b, v1.16b, v24.16b 911 ushr v31.4s, v3.4s, #7 912 orr v2.16b, v2.16b, v30.16b 913 ldp q24, q30, [sp, #208] 914 shl v3.4s, v3.4s, #25 915 zip2 v14.2d, v12.2d, v9.2d 916 mov v22.16b, v24.16b 917 orr v1.16b, v3.16b, v31.16b 918 zip2 v3.2d, v24.2d, v30.2d 919 mov v24.16b, v28.16b 920 mov v22.d[1], v30.d[0] 921 ldr q30, [sp, #240] 922 mov v31.16b, v12.16b 923 stp q22, q14, [sp, #224] 924 mov v24.d[1], v30.d[0] 925 add v12.4s, v8.4s, v22.4s 926 mov v31.d[1], v9.d[0] 927 add v22.4s, v29.4s, v24.4s 928 ldr q29, [sp, #176] 929 zip2 v28.2d, v28.2d, v30.2d 930 mov v9.16b, v24.16b 931 mov v15.d[1], v29.d[0] 932 zip2 v8.2d, v10.2d, v29.2d 933 add v10.4s, v12.4s, v0.4s 934 add v22.4s, v22.4s, v2.4s 935 str q9, [sp, #144] 936 add v20.4s, v20.4s, v15.4s 937 add v17.4s, v17.4s, v31.4s 938 stp q3, q8, [sp, #192] 939 eor v4.16b, v4.16b, v10.16b 940 eor v25.16b, v25.16b, v22.16b 941 add v20.4s, v20.4s, v5.4s 942 add v17.4s, v17.4s, v1.4s 943 tbl v4.16b, { v4.16b }, v27.16b 944 tbl v25.16b, { v25.16b }, v27.16b 945 eor v6.16b, v6.16b, v20.16b 946 eor v16.16b, v16.16b, v17.16b 947 add v26.4s, v26.4s, v4.4s 948 add v7.4s, v7.4s, v25.4s 949 tbl v6.16b, { v6.16b }, v27.16b 950 tbl v16.16b, { v16.16b }, v27.16b 951 eor v0.16b, v26.16b, v0.16b 952 eor v2.16b, v7.16b, v2.16b 953 add v21.4s, v21.4s, v6.4s 954 add v19.4s, v19.4s, v16.4s 955 ushr v12.4s, v0.4s, #12 956 shl v0.4s, v0.4s, #20 957 ushr v13.4s, v2.4s, #12 958 shl v2.4s, v2.4s, #20 959 eor v5.16b, v21.16b, v5.16b 960 eor v1.16b, v19.16b, v1.16b 961 orr v0.16b, v0.16b, v12.16b 962 add v10.4s, v10.4s, v3.4s 963 orr v2.16b, v2.16b, v13.16b 964 ushr v13.4s, v5.4s, #12 965 shl v5.4s, v5.4s, #20 966 add v22.4s, v22.4s, v28.4s 967 ushr v12.4s, v1.4s, #12 968 shl v1.4s, v1.4s, #20 969 add v10.4s, v10.4s, v0.4s 970 orr v5.16b, v5.16b, v13.16b 971 add v22.4s, v22.4s, v2.4s 972 add v20.4s, v20.4s, v8.4s 973 orr v1.16b, v1.16b, v12.16b 974 add v17.4s, v17.4s, v14.4s 975 eor v4.16b, v4.16b, v10.16b 976 eor v25.16b, v25.16b, v22.16b 977 add v20.4s, v20.4s, v5.4s 978 add v17.4s, v17.4s, v1.4s 979 tbl v4.16b, { v4.16b }, v18.16b 980 tbl v25.16b, { v25.16b }, v18.16b 981 eor v6.16b, v6.16b, v20.16b 982 eor v16.16b, v16.16b, v17.16b 983 add v26.4s, v26.4s, v4.4s 984 add v7.4s, v7.4s, v25.4s 985 tbl v6.16b, { v6.16b }, v18.16b 986 tbl v16.16b, { v16.16b }, v18.16b 987 eor v0.16b, v26.16b, v0.16b 988 eor v2.16b, v7.16b, v2.16b 989 add v21.4s, v21.4s, v6.4s 990 add v19.4s, v19.4s, v16.4s 991 ushr v12.4s, v0.4s, #7 992 shl v0.4s, v0.4s, #25 993 ushr v13.4s, v2.4s, #7 994 shl v2.4s, v2.4s, #25 995 eor v5.16b, v21.16b, v5.16b 996 eor v1.16b, v19.16b, v1.16b 997 orr v0.16b, v0.16b, v12.16b 998 add v22.4s, v22.4s, v23.4s 999 orr v2.16b, v2.16b, v13.16b 1000 ushr v13.4s, v5.4s, #7 1001 shl v5.4s, v5.4s, #25 1002 add v17.4s, v17.4s, v11.4s 1003 mov v30.16b, v28.16b 1004 mov v28.16b, v23.16b 1005 ldr q23, [sp, #304] 1006 ushr v12.4s, v1.4s, #7 1007 shl v1.4s, v1.4s, #25 1008 add v22.4s, v22.4s, v0.4s 1009 mov v29.16b, v31.16b 1010 ldr q31, [sp, #160] 1011 orr v5.16b, v5.16b, v13.16b 1012 add v17.4s, v17.4s, v2.4s 1013 add v10.4s, v10.4s, v23.4s 1014 orr v1.16b, v1.16b, v12.16b 1015 str q29, [sp, #272] 1016 eor v16.16b, v16.16b, v22.16b 1017 add v20.4s, v20.4s, v31.4s 1018 eor v6.16b, v6.16b, v17.16b 1019 add v10.4s, v10.4s, v5.4s 1020 tbl v16.16b, { v16.16b }, v27.16b 1021 add v20.4s, v20.4s, v1.4s 1022 tbl v6.16b, { v6.16b }, v27.16b 1023 eor v25.16b, v25.16b, v10.16b 1024 add v21.4s, v21.4s, v16.4s 1025 eor v4.16b, v4.16b, v20.16b 1026 add v26.4s, v26.4s, v6.4s 1027 tbl v25.16b, { v25.16b }, v27.16b 1028 eor v0.16b, v21.16b, v0.16b 1029 tbl v4.16b, { v4.16b }, v27.16b 1030 eor v2.16b, v26.16b, v2.16b 1031 add v19.4s, v19.4s, v25.4s 1032 ushr v12.4s, v0.4s, #12 1033 shl v0.4s, v0.4s, #20 1034 add v7.4s, v7.4s, v4.4s 1035 ushr v13.4s, v2.4s, #12 1036 shl v2.4s, v2.4s, #20 1037 eor v5.16b, v5.16b, v19.16b 1038 add v22.4s, v22.4s, v24.4s 1039 ldr q24, [sp, #320] 1040 orr v0.16b, v0.16b, v12.16b 1041 eor v1.16b, v7.16b, v1.16b 1042 orr v2.16b, v2.16b, v13.16b 1043 ushr v12.4s, v5.4s, #12 1044 shl v5.4s, v5.4s, #20 1045 add v17.4s, v17.4s, v24.4s 1046 ldr q24, [sp, #352] 1047 ushr v13.4s, v1.4s, #12 1048 shl v1.4s, v1.4s, #20 1049 add v22.4s, v22.4s, v0.4s 1050 orr v5.16b, v5.16b, v12.16b 1051 add v17.4s, v17.4s, v2.4s 1052 add v10.4s, v10.4s, v24.4s 1053 ldr q24, [sp, #336] 1054 orr v1.16b, v1.16b, v13.16b 1055 eor v16.16b, v16.16b, v22.16b 1056 add v20.4s, v20.4s, v14.4s 1057 eor v6.16b, v6.16b, v17.16b 1058 add v10.4s, v10.4s, v5.4s 1059 tbl v16.16b, { v16.16b }, v18.16b 1060 add v20.4s, v20.4s, v1.4s 1061 tbl v6.16b, { v6.16b }, v18.16b 1062 eor v25.16b, v25.16b, v10.16b 1063 add v21.4s, v21.4s, v16.4s 1064 eor v4.16b, v4.16b, v20.16b 1065 add v26.4s, v26.4s, v6.4s 1066 tbl v25.16b, { v25.16b }, v18.16b 1067 eor v0.16b, v21.16b, v0.16b 1068 tbl v4.16b, { v4.16b }, v18.16b 1069 eor v2.16b, v26.16b, v2.16b 1070 add v19.4s, v19.4s, v25.4s 1071 ushr v12.4s, v0.4s, #7 1072 shl v0.4s, v0.4s, #25 1073 add v7.4s, v7.4s, v4.4s 1074 ushr v13.4s, v2.4s, #7 1075 shl v2.4s, v2.4s, #25 1076 eor v5.16b, v19.16b, v5.16b 1077 orr v0.16b, v0.16b, v12.16b 1078 eor v1.16b, v7.16b, v1.16b 1079 add v10.4s, v10.4s, v24.4s 1080 orr v2.16b, v2.16b, v13.16b 1081 ushr v12.4s, v5.4s, #7 1082 shl v5.4s, v5.4s, #25 1083 add v22.4s, v22.4s, v29.4s 1084 ushr v13.4s, v1.4s, #7 1085 shl v1.4s, v1.4s, #25 1086 add v10.4s, v10.4s, v0.4s 1087 orr v5.16b, v5.16b, v12.16b 1088 add v22.4s, v22.4s, v2.4s 1089 add v20.4s, v20.4s, v8.4s 1090 ldr q8, [sp, #288] 1091 orr v1.16b, v1.16b, v13.16b 1092 add v17.4s, v17.4s, v3.4s 1093 ldr q3, [sp, #352] 1094 eor v4.16b, v4.16b, v10.16b 1095 eor v25.16b, v25.16b, v22.16b 1096 add v20.4s, v20.4s, v5.4s 1097 add v17.4s, v17.4s, v1.4s 1098 tbl v4.16b, { v4.16b }, v27.16b 1099 tbl v25.16b, { v25.16b }, v27.16b 1100 eor v6.16b, v6.16b, v20.16b 1101 eor v16.16b, v16.16b, v17.16b 1102 add v26.4s, v26.4s, v4.4s 1103 add v7.4s, v7.4s, v25.4s 1104 tbl v6.16b, { v6.16b }, v27.16b 1105 tbl v16.16b, { v16.16b }, v27.16b 1106 eor v0.16b, v26.16b, v0.16b 1107 eor v2.16b, v7.16b, v2.16b 1108 add v21.4s, v21.4s, v6.4s 1109 add v19.4s, v19.4s, v16.4s 1110 ushr v12.4s, v0.4s, #12 1111 shl v0.4s, v0.4s, #20 1112 ushr v13.4s, v2.4s, #12 1113 shl v2.4s, v2.4s, #20 1114 eor v5.16b, v21.16b, v5.16b 1115 eor v1.16b, v19.16b, v1.16b 1116 orr v0.16b, v0.16b, v12.16b 1117 add v10.4s, v10.4s, v30.4s 1118 orr v2.16b, v2.16b, v13.16b 1119 ushr v13.4s, v5.4s, #12 1120 shl v5.4s, v5.4s, #20 1121 add v22.4s, v22.4s, v8.4s 1122 mov v24.16b, v30.16b 1123 mov v30.16b, v15.16b 1124 add v17.4s, v17.4s, v15.4s 1125 ldr q15, [sp, #224] 1126 ushr v12.4s, v1.4s, #12 1127 shl v1.4s, v1.4s, #20 1128 add v10.4s, v10.4s, v0.4s 1129 str q30, [sp, #176] 1130 orr v5.16b, v5.16b, v13.16b 1131 add v22.4s, v22.4s, v2.4s 1132 add v20.4s, v20.4s, v15.4s 1133 orr v1.16b, v1.16b, v12.16b 1134 eor v4.16b, v4.16b, v10.16b 1135 eor v25.16b, v25.16b, v22.16b 1136 add v20.4s, v20.4s, v5.4s 1137 add v17.4s, v17.4s, v1.4s 1138 tbl v4.16b, { v4.16b }, v18.16b 1139 tbl v25.16b, { v25.16b }, v18.16b 1140 eor v6.16b, v6.16b, v20.16b 1141 eor v16.16b, v16.16b, v17.16b 1142 add v26.4s, v26.4s, v4.4s 1143 add v7.4s, v7.4s, v25.4s 1144 tbl v6.16b, { v6.16b }, v18.16b 1145 tbl v16.16b, { v16.16b }, v18.16b 1146 eor v0.16b, v26.16b, v0.16b 1147 eor v2.16b, v7.16b, v2.16b 1148 add v21.4s, v21.4s, v6.4s 1149 add v19.4s, v19.4s, v16.4s 1150 ushr v12.4s, v0.4s, #7 1151 shl v0.4s, v0.4s, #25 1152 ushr v13.4s, v2.4s, #7 1153 shl v2.4s, v2.4s, #25 1154 eor v5.16b, v21.16b, v5.16b 1155 eor v1.16b, v19.16b, v1.16b 1156 orr v0.16b, v0.16b, v12.16b 1157 add v22.4s, v22.4s, v9.4s 1158 orr v2.16b, v2.16b, v13.16b 1159 ushr v13.4s, v5.4s, #7 1160 shl v5.4s, v5.4s, #25 1161 add v17.4s, v17.4s, v14.4s 1162 ushr v12.4s, v1.4s, #7 1163 shl v1.4s, v1.4s, #25 1164 add v22.4s, v22.4s, v0.4s 1165 orr v5.16b, v5.16b, v13.16b 1166 add v17.4s, v17.4s, v2.4s 1167 add v10.4s, v10.4s, v28.4s 1168 orr v1.16b, v1.16b, v12.16b 1169 eor v16.16b, v16.16b, v22.16b 1170 add v20.4s, v20.4s, v11.4s 1171 eor v6.16b, v6.16b, v17.16b 1172 add v10.4s, v10.4s, v5.4s 1173 tbl v16.16b, { v16.16b }, v27.16b 1174 add v20.4s, v20.4s, v1.4s 1175 tbl v6.16b, { v6.16b }, v27.16b 1176 eor v25.16b, v25.16b, v10.16b 1177 add v21.4s, v21.4s, v16.4s 1178 eor v4.16b, v4.16b, v20.16b 1179 add v26.4s, v26.4s, v6.4s 1180 tbl v25.16b, { v25.16b }, v27.16b 1181 eor v0.16b, v21.16b, v0.16b 1182 tbl v4.16b, { v4.16b }, v27.16b 1183 eor v2.16b, v26.16b, v2.16b 1184 add v19.4s, v19.4s, v25.4s 1185 ushr v12.4s, v0.4s, #12 1186 shl v0.4s, v0.4s, #20 1187 add v7.4s, v7.4s, v4.4s 1188 ushr v13.4s, v2.4s, #12 1189 shl v2.4s, v2.4s, #20 1190 eor v5.16b, v5.16b, v19.16b 1191 orr v0.16b, v0.16b, v12.16b 1192 eor v1.16b, v7.16b, v1.16b 1193 add v22.4s, v22.4s, v29.4s 1194 orr v2.16b, v2.16b, v13.16b 1195 ushr v12.4s, v5.4s, #12 1196 shl v5.4s, v5.4s, #20 1197 add v17.4s, v17.4s, v23.4s 1198 ushr v13.4s, v1.4s, #12 1199 shl v1.4s, v1.4s, #20 1200 add v22.4s, v22.4s, v0.4s 1201 orr v5.16b, v5.16b, v12.16b 1202 add v17.4s, v17.4s, v2.4s 1203 add v10.4s, v10.4s, v31.4s 1204 orr v1.16b, v1.16b, v13.16b 1205 eor v16.16b, v16.16b, v22.16b 1206 add v20.4s, v20.4s, v30.4s 1207 eor v6.16b, v6.16b, v17.16b 1208 add v10.4s, v10.4s, v5.4s 1209 tbl v16.16b, { v16.16b }, v18.16b 1210 add v20.4s, v20.4s, v1.4s 1211 tbl v6.16b, { v6.16b }, v18.16b 1212 eor v25.16b, v25.16b, v10.16b 1213 add v21.4s, v21.4s, v16.4s 1214 eor v4.16b, v4.16b, v20.16b 1215 add v26.4s, v26.4s, v6.4s 1216 tbl v25.16b, { v25.16b }, v18.16b 1217 eor v0.16b, v21.16b, v0.16b 1218 tbl v4.16b, { v4.16b }, v18.16b 1219 eor v2.16b, v26.16b, v2.16b 1220 add v19.4s, v19.4s, v25.4s 1221 ushr v12.4s, v0.4s, #7 1222 shl v0.4s, v0.4s, #25 1223 add v7.4s, v7.4s, v4.4s 1224 ushr v13.4s, v2.4s, #7 1225 shl v2.4s, v2.4s, #25 1226 eor v5.16b, v19.16b, v5.16b 1227 add v10.4s, v10.4s, v3.4s 1228 ldr q3, [sp, #192] 1229 orr v0.16b, v0.16b, v12.16b 1230 eor v1.16b, v7.16b, v1.16b 1231 orr v2.16b, v2.16b, v13.16b 1232 ushr v12.4s, v5.4s, #7 1233 shl v5.4s, v5.4s, #25 1234 add v22.4s, v22.4s, v3.4s 1235 ushr v13.4s, v1.4s, #7 1236 shl v1.4s, v1.4s, #25 1237 add v10.4s, v10.4s, v0.4s 1238 orr v5.16b, v5.16b, v12.16b 1239 add v22.4s, v22.4s, v2.4s 1240 add v20.4s, v20.4s, v15.4s 1241 ldr q15, [sp, #128] 1242 orr v1.16b, v1.16b, v13.16b 1243 add v17.4s, v17.4s, v24.4s 1244 eor v4.16b, v4.16b, v10.16b 1245 eor v25.16b, v25.16b, v22.16b 1246 add v20.4s, v20.4s, v5.4s 1247 add v17.4s, v17.4s, v1.4s 1248 tbl v4.16b, { v4.16b }, v27.16b 1249 tbl v25.16b, { v25.16b }, v27.16b 1250 eor v6.16b, v6.16b, v20.16b 1251 eor v16.16b, v16.16b, v17.16b 1252 add v26.4s, v26.4s, v4.4s 1253 add v7.4s, v7.4s, v25.4s 1254 tbl v6.16b, { v6.16b }, v27.16b 1255 tbl v16.16b, { v16.16b }, v27.16b 1256 eor v0.16b, v26.16b, v0.16b 1257 eor v2.16b, v7.16b, v2.16b 1258 add v21.4s, v21.4s, v6.4s 1259 add v19.4s, v19.4s, v16.4s 1260 ushr v12.4s, v0.4s, #12 1261 shl v0.4s, v0.4s, #20 1262 ushr v13.4s, v2.4s, #12 1263 shl v2.4s, v2.4s, #20 1264 eor v5.16b, v21.16b, v5.16b 1265 ldp q23, q11, [sp, #320] 1266 eor v1.16b, v19.16b, v1.16b 1267 orr v0.16b, v0.16b, v12.16b 1268 add v10.4s, v10.4s, v8.4s 1269 orr v2.16b, v2.16b, v13.16b 1270 ushr v13.4s, v5.4s, #12 1271 shl v5.4s, v5.4s, #20 1272 add v22.4s, v22.4s, v23.4s 1273 ushr v12.4s, v1.4s, #12 1274 shl v1.4s, v1.4s, #20 1275 add v10.4s, v10.4s, v0.4s 1276 mov v28.16b, v31.16b 1277 mov v31.16b, v8.16b 1278 ldr q8, [sp, #208] 1279 orr v5.16b, v5.16b, v13.16b 1280 add v22.4s, v22.4s, v2.4s 1281 add v20.4s, v20.4s, v11.4s 1282 orr v1.16b, v1.16b, v12.16b 1283 add v17.4s, v17.4s, v8.4s 1284 eor v4.16b, v4.16b, v10.16b 1285 eor v25.16b, v25.16b, v22.16b 1286 add v20.4s, v20.4s, v5.4s 1287 add v17.4s, v17.4s, v1.4s 1288 tbl v4.16b, { v4.16b }, v18.16b 1289 tbl v25.16b, { v25.16b }, v18.16b 1290 eor v6.16b, v6.16b, v20.16b 1291 eor v16.16b, v16.16b, v17.16b 1292 add v26.4s, v26.4s, v4.4s 1293 add v7.4s, v7.4s, v25.4s 1294 tbl v6.16b, { v6.16b }, v18.16b 1295 tbl v16.16b, { v16.16b }, v18.16b 1296 eor v0.16b, v26.16b, v0.16b 1297 eor v2.16b, v7.16b, v2.16b 1298 add v21.4s, v21.4s, v6.4s 1299 add v19.4s, v19.4s, v16.4s 1300 ushr v12.4s, v0.4s, #7 1301 shl v0.4s, v0.4s, #25 1302 ushr v13.4s, v2.4s, #7 1303 shl v2.4s, v2.4s, #25 1304 eor v5.16b, v21.16b, v5.16b 1305 eor v1.16b, v19.16b, v1.16b 1306 orr v0.16b, v0.16b, v12.16b 1307 add v22.4s, v22.4s, v29.4s 1308 orr v2.16b, v2.16b, v13.16b 1309 ushr v13.4s, v5.4s, #7 1310 shl v5.4s, v5.4s, #25 1311 add v17.4s, v17.4s, v30.4s 1312 ushr v12.4s, v1.4s, #7 1313 shl v1.4s, v1.4s, #25 1314 add v22.4s, v22.4s, v0.4s 1315 orr v5.16b, v5.16b, v13.16b 1316 add v17.4s, v17.4s, v2.4s 1317 add v10.4s, v10.4s, v9.4s 1318 orr v1.16b, v1.16b, v12.16b 1319 eor v16.16b, v16.16b, v22.16b 1320 add v20.4s, v20.4s, v14.4s 1321 ldr q14, [sp, #256] 1322 eor v6.16b, v6.16b, v17.16b 1323 add v10.4s, v10.4s, v5.4s 1324 tbl v16.16b, { v16.16b }, v27.16b 1325 add v20.4s, v20.4s, v1.4s 1326 tbl v6.16b, { v6.16b }, v27.16b 1327 eor v25.16b, v25.16b, v10.16b 1328 add v21.4s, v21.4s, v16.4s 1329 eor v4.16b, v4.16b, v20.16b 1330 add v26.4s, v26.4s, v6.4s 1331 tbl v25.16b, { v25.16b }, v27.16b 1332 eor v0.16b, v21.16b, v0.16b 1333 tbl v4.16b, { v4.16b }, v27.16b 1334 eor v2.16b, v26.16b, v2.16b 1335 add v19.4s, v19.4s, v25.4s 1336 ushr v12.4s, v0.4s, #12 1337 shl v0.4s, v0.4s, #20 1338 add v7.4s, v7.4s, v4.4s 1339 ushr v13.4s, v2.4s, #12 1340 shl v2.4s, v2.4s, #20 1341 eor v5.16b, v5.16b, v19.16b 1342 orr v0.16b, v0.16b, v12.16b 1343 eor v1.16b, v7.16b, v1.16b 1344 add v22.4s, v22.4s, v3.4s 1345 orr v2.16b, v2.16b, v13.16b 1346 ushr v12.4s, v5.4s, #12 1347 shl v5.4s, v5.4s, #20 1348 add v17.4s, v17.4s, v15.4s 1349 ushr v13.4s, v1.4s, #12 1350 shl v1.4s, v1.4s, #20 1351 add v22.4s, v22.4s, v0.4s 1352 orr v5.16b, v5.16b, v12.16b 1353 add v17.4s, v17.4s, v2.4s 1354 add v10.4s, v10.4s, v14.4s 1355 orr v1.16b, v1.16b, v13.16b 1356 eor v16.16b, v16.16b, v22.16b 1357 add v20.4s, v20.4s, v8.4s 1358 eor v6.16b, v6.16b, v17.16b 1359 add v10.4s, v10.4s, v5.4s 1360 tbl v16.16b, { v16.16b }, v18.16b 1361 add v20.4s, v20.4s, v1.4s 1362 tbl v6.16b, { v6.16b }, v18.16b 1363 eor v25.16b, v25.16b, v10.16b 1364 add v21.4s, v21.4s, v16.4s 1365 eor v4.16b, v4.16b, v20.16b 1366 add v26.4s, v26.4s, v6.4s 1367 tbl v25.16b, { v25.16b }, v18.16b 1368 eor v0.16b, v21.16b, v0.16b 1369 tbl v4.16b, { v4.16b }, v18.16b 1370 eor v2.16b, v26.16b, v2.16b 1371 add v19.4s, v19.4s, v25.4s 1372 ushr v12.4s, v0.4s, #7 1373 shl v0.4s, v0.4s, #25 1374 add v7.4s, v7.4s, v4.4s 1375 ushr v13.4s, v2.4s, #7 1376 shl v2.4s, v2.4s, #25 1377 eor v5.16b, v19.16b, v5.16b 1378 orr v0.16b, v0.16b, v12.16b 1379 eor v1.16b, v7.16b, v1.16b 1380 add v10.4s, v10.4s, v28.4s 1381 orr v2.16b, v2.16b, v13.16b 1382 ushr v12.4s, v5.4s, #7 1383 shl v5.4s, v5.4s, #25 1384 add v22.4s, v22.4s, v24.4s 1385 ushr v13.4s, v1.4s, #7 1386 shl v1.4s, v1.4s, #25 1387 add v10.4s, v10.4s, v0.4s 1388 orr v5.16b, v5.16b, v12.16b 1389 add v22.4s, v22.4s, v2.4s 1390 add v20.4s, v20.4s, v11.4s 1391 ldr q11, [sp, #304] 1392 orr v1.16b, v1.16b, v13.16b 1393 add v17.4s, v17.4s, v31.4s 1394 ldr q31, [sp, #224] 1395 eor v4.16b, v4.16b, v10.16b 1396 eor v25.16b, v25.16b, v22.16b 1397 add v20.4s, v20.4s, v5.4s 1398 add v17.4s, v17.4s, v1.4s 1399 tbl v4.16b, { v4.16b }, v27.16b 1400 tbl v25.16b, { v25.16b }, v27.16b 1401 eor v6.16b, v6.16b, v20.16b 1402 eor v16.16b, v16.16b, v17.16b 1403 add v26.4s, v26.4s, v4.4s 1404 add v7.4s, v7.4s, v25.4s 1405 tbl v6.16b, { v6.16b }, v27.16b 1406 tbl v16.16b, { v16.16b }, v27.16b 1407 eor v0.16b, v26.16b, v0.16b 1408 eor v2.16b, v7.16b, v2.16b 1409 add v21.4s, v21.4s, v6.4s 1410 add v19.4s, v19.4s, v16.4s 1411 ushr v12.4s, v0.4s, #12 1412 shl v0.4s, v0.4s, #20 1413 ushr v13.4s, v2.4s, #12 1414 shl v2.4s, v2.4s, #20 1415 eor v5.16b, v21.16b, v5.16b 1416 eor v1.16b, v19.16b, v1.16b 1417 orr v0.16b, v0.16b, v12.16b 1418 add v10.4s, v10.4s, v23.4s 1419 ldr q23, [sp, #240] 1420 orr v2.16b, v2.16b, v13.16b 1421 ushr v13.4s, v5.4s, #12 1422 shl v5.4s, v5.4s, #20 1423 add v22.4s, v22.4s, v11.4s 1424 mov v30.16b, v8.16b 1425 mov v8.16b, v24.16b 1426 ldr q24, [sp, #352] 1427 ushr v12.4s, v1.4s, #12 1428 shl v1.4s, v1.4s, #20 1429 add v10.4s, v10.4s, v0.4s 1430 orr v5.16b, v5.16b, v13.16b 1431 str q8, [sp, #112] 1432 add v22.4s, v22.4s, v2.4s 1433 add v20.4s, v20.4s, v24.4s 1434 orr v1.16b, v1.16b, v12.16b 1435 add v17.4s, v17.4s, v31.4s 1436 eor v4.16b, v4.16b, v10.16b 1437 eor v25.16b, v25.16b, v22.16b 1438 add v20.4s, v20.4s, v5.4s 1439 add v17.4s, v17.4s, v1.4s 1440 tbl v4.16b, { v4.16b }, v18.16b 1441 tbl v25.16b, { v25.16b }, v18.16b 1442 eor v6.16b, v6.16b, v20.16b 1443 eor v16.16b, v16.16b, v17.16b 1444 add v26.4s, v26.4s, v4.4s 1445 add v7.4s, v7.4s, v25.4s 1446 tbl v6.16b, { v6.16b }, v18.16b 1447 tbl v16.16b, { v16.16b }, v18.16b 1448 eor v0.16b, v26.16b, v0.16b 1449 eor v2.16b, v7.16b, v2.16b 1450 add v21.4s, v21.4s, v6.4s 1451 mov v29.16b, v3.16b 1452 add v19.4s, v19.4s, v16.4s 1453 ushr v12.4s, v0.4s, #7 1454 shl v0.4s, v0.4s, #25 1455 ushr v13.4s, v2.4s, #7 1456 shl v2.4s, v2.4s, #25 1457 eor v5.16b, v21.16b, v5.16b 1458 eor v1.16b, v19.16b, v1.16b 1459 orr v0.16b, v0.16b, v12.16b 1460 add v22.4s, v22.4s, v29.4s 1461 orr v2.16b, v2.16b, v13.16b 1462 ushr v13.4s, v5.4s, #7 1463 shl v5.4s, v5.4s, #25 1464 add v17.4s, v17.4s, v30.4s 1465 ldr q30, [sp, #272] 1466 ushr v12.4s, v1.4s, #7 1467 shl v1.4s, v1.4s, #25 1468 add v22.4s, v22.4s, v0.4s 1469 mov v3.16b, v28.16b 1470 ldr q28, [sp, #176] 1471 orr v5.16b, v5.16b, v13.16b 1472 add v17.4s, v17.4s, v2.4s 1473 add v10.4s, v10.4s, v30.4s 1474 orr v1.16b, v1.16b, v12.16b 1475 eor v16.16b, v16.16b, v22.16b 1476 add v20.4s, v20.4s, v28.4s 1477 eor v6.16b, v6.16b, v17.16b 1478 add v10.4s, v10.4s, v5.4s 1479 tbl v16.16b, { v16.16b }, v27.16b 1480 add v20.4s, v20.4s, v1.4s 1481 tbl v6.16b, { v6.16b }, v27.16b 1482 eor v25.16b, v25.16b, v10.16b 1483 add v21.4s, v21.4s, v16.4s 1484 eor v4.16b, v4.16b, v20.16b 1485 add v26.4s, v26.4s, v6.4s 1486 tbl v25.16b, { v25.16b }, v27.16b 1487 eor v0.16b, v21.16b, v0.16b 1488 tbl v4.16b, { v4.16b }, v27.16b 1489 eor v2.16b, v26.16b, v2.16b 1490 add v19.4s, v19.4s, v25.4s 1491 ushr v12.4s, v0.4s, #12 1492 shl v0.4s, v0.4s, #20 1493 add v7.4s, v7.4s, v4.4s 1494 ushr v13.4s, v2.4s, #12 1495 shl v2.4s, v2.4s, #20 1496 eor v5.16b, v5.16b, v19.16b 1497 orr v0.16b, v0.16b, v12.16b 1498 eor v1.16b, v7.16b, v1.16b 1499 add v22.4s, v22.4s, v8.4s 1500 orr v2.16b, v2.16b, v13.16b 1501 ushr v12.4s, v5.4s, #12 1502 shl v5.4s, v5.4s, #20 1503 add v17.4s, v17.4s, v9.4s 1504 ldr q9, [sp, #320] 1505 ushr v13.4s, v1.4s, #12 1506 shl v1.4s, v1.4s, #20 1507 add v22.4s, v22.4s, v0.4s 1508 orr v5.16b, v5.16b, v12.16b 1509 add v17.4s, v17.4s, v2.4s 1510 add v10.4s, v10.4s, v23.4s 1511 orr v1.16b, v1.16b, v13.16b 1512 eor v16.16b, v16.16b, v22.16b 1513 add v20.4s, v20.4s, v31.4s 1514 eor v6.16b, v6.16b, v17.16b 1515 add v10.4s, v10.4s, v5.4s 1516 tbl v16.16b, { v16.16b }, v18.16b 1517 add v20.4s, v20.4s, v1.4s 1518 tbl v6.16b, { v6.16b }, v18.16b 1519 eor v25.16b, v25.16b, v10.16b 1520 add v21.4s, v21.4s, v16.4s 1521 eor v4.16b, v4.16b, v20.16b 1522 add v26.4s, v26.4s, v6.4s 1523 tbl v25.16b, { v25.16b }, v18.16b 1524 eor v0.16b, v21.16b, v0.16b 1525 tbl v4.16b, { v4.16b }, v18.16b 1526 eor v2.16b, v26.16b, v2.16b 1527 add v19.4s, v19.4s, v25.4s 1528 ushr v12.4s, v0.4s, #7 1529 shl v0.4s, v0.4s, #25 1530 add v7.4s, v7.4s, v4.4s 1531 ushr v13.4s, v2.4s, #7 1532 shl v2.4s, v2.4s, #25 1533 eor v5.16b, v19.16b, v5.16b 1534 add v10.4s, v10.4s, v14.4s 1535 ldr q14, [sp, #288] 1536 orr v0.16b, v0.16b, v12.16b 1537 eor v1.16b, v7.16b, v1.16b 1538 orr v2.16b, v2.16b, v13.16b 1539 ushr v12.4s, v5.4s, #7 1540 shl v5.4s, v5.4s, #25 1541 add v22.4s, v22.4s, v14.4s 1542 ushr v13.4s, v1.4s, #7 1543 shl v1.4s, v1.4s, #25 1544 add v10.4s, v10.4s, v0.4s 1545 orr v5.16b, v5.16b, v12.16b 1546 add v22.4s, v22.4s, v2.4s 1547 add v20.4s, v20.4s, v24.4s 1548 orr v1.16b, v1.16b, v13.16b 1549 eor v4.16b, v4.16b, v10.16b 1550 add v17.4s, v17.4s, v9.4s 1551 eor v25.16b, v25.16b, v22.16b 1552 add v20.4s, v20.4s, v5.4s 1553 tbl v4.16b, { v4.16b }, v27.16b 1554 add v17.4s, v17.4s, v1.4s 1555 tbl v25.16b, { v25.16b }, v27.16b 1556 eor v6.16b, v6.16b, v20.16b 1557 add v26.4s, v26.4s, v4.4s 1558 eor v16.16b, v16.16b, v17.16b 1559 add v7.4s, v7.4s, v25.4s 1560 tbl v6.16b, { v6.16b }, v27.16b 1561 eor v0.16b, v26.16b, v0.16b 1562 tbl v16.16b, { v16.16b }, v27.16b 1563 eor v2.16b, v7.16b, v2.16b 1564 add v21.4s, v21.4s, v6.4s 1565 ushr v12.4s, v0.4s, #12 1566 shl v0.4s, v0.4s, #20 1567 add v19.4s, v19.4s, v16.4s 1568 ushr v13.4s, v2.4s, #12 1569 shl v2.4s, v2.4s, #20 1570 eor v5.16b, v21.16b, v5.16b 1571 orr v0.16b, v0.16b, v12.16b 1572 eor v1.16b, v19.16b, v1.16b 1573 add v10.4s, v10.4s, v11.4s 1574 orr v2.16b, v2.16b, v13.16b 1575 ushr v13.4s, v5.4s, #12 1576 shl v5.4s, v5.4s, #20 1577 ushr v12.4s, v1.4s, #12 1578 shl v1.4s, v1.4s, #20 1579 add v10.4s, v10.4s, v0.4s 1580 add v22.4s, v22.4s, v15.4s 1581 orr v5.16b, v5.16b, v13.16b 1582 add v20.4s, v20.4s, v3.4s 1583 mov v24.16b, v3.16b 1584 ldr q3, [sp, #336] 1585 orr v1.16b, v1.16b, v12.16b 1586 eor v4.16b, v4.16b, v10.16b 1587 add v22.4s, v22.4s, v2.4s 1588 add v17.4s, v17.4s, v3.4s 1589 add v20.4s, v20.4s, v5.4s 1590 tbl v4.16b, { v4.16b }, v18.16b 1591 eor v25.16b, v25.16b, v22.16b 1592 add v17.4s, v17.4s, v1.4s 1593 eor v6.16b, v6.16b, v20.16b 1594 add v26.4s, v26.4s, v4.4s 1595 tbl v25.16b, { v25.16b }, v18.16b 1596 eor v16.16b, v16.16b, v17.16b 1597 tbl v6.16b, { v6.16b }, v18.16b 1598 eor v0.16b, v26.16b, v0.16b 1599 add v7.4s, v7.4s, v25.4s 1600 tbl v16.16b, { v16.16b }, v18.16b 1601 add v21.4s, v21.4s, v6.4s 1602 ushr v12.4s, v0.4s, #7 1603 shl v0.4s, v0.4s, #25 1604 eor v2.16b, v7.16b, v2.16b 1605 add v19.4s, v19.4s, v16.4s 1606 eor v5.16b, v21.16b, v5.16b 1607 orr v0.16b, v0.16b, v12.16b 1608 ushr v12.4s, v2.4s, #7 1609 shl v2.4s, v2.4s, #25 1610 eor v1.16b, v19.16b, v1.16b 1611 ushr v13.4s, v5.4s, #7 1612 shl v5.4s, v5.4s, #25 1613 add v22.4s, v22.4s, v8.4s 1614 orr v2.16b, v2.16b, v12.16b 1615 ushr v12.4s, v1.4s, #7 1616 shl v1.4s, v1.4s, #25 1617 orr v5.16b, v5.16b, v13.16b 1618 add v22.4s, v22.4s, v0.4s 1619 add v10.4s, v10.4s, v29.4s 1620 ldr q29, [sp, #208] 1621 add v17.4s, v17.4s, v31.4s 1622 orr v1.16b, v1.16b, v12.16b 1623 add v20.4s, v20.4s, v29.4s 1624 eor v16.16b, v16.16b, v22.16b 1625 add v10.4s, v10.4s, v5.4s 1626 add v17.4s, v17.4s, v2.4s 1627 add v20.4s, v20.4s, v1.4s 1628 tbl v16.16b, { v16.16b }, v27.16b 1629 eor v25.16b, v25.16b, v10.16b 1630 eor v6.16b, v6.16b, v17.16b 1631 eor v4.16b, v4.16b, v20.16b 1632 add v21.4s, v21.4s, v16.4s 1633 tbl v25.16b, { v25.16b }, v27.16b 1634 tbl v6.16b, { v6.16b }, v27.16b 1635 tbl v4.16b, { v4.16b }, v27.16b 1636 eor v0.16b, v21.16b, v0.16b 1637 add v19.4s, v19.4s, v25.4s 1638 add v26.4s, v26.4s, v6.4s 1639 add v7.4s, v7.4s, v4.4s 1640 ushr v12.4s, v0.4s, #12 1641 shl v0.4s, v0.4s, #20 1642 eor v5.16b, v5.16b, v19.16b 1643 eor v2.16b, v26.16b, v2.16b 1644 eor v1.16b, v7.16b, v1.16b 1645 orr v0.16b, v0.16b, v12.16b 1646 ushr v12.4s, v5.4s, #12 1647 shl v5.4s, v5.4s, #20 1648 add v22.4s, v22.4s, v14.4s 1649 mov v8.16b, v31.16b 1650 ushr v13.4s, v2.4s, #12 1651 shl v2.4s, v2.4s, #20 1652 mov v31.16b, v14.16b 1653 ushr v14.4s, v1.4s, #12 1654 shl v1.4s, v1.4s, #20 1655 orr v5.16b, v5.16b, v12.16b 1656 add v22.4s, v22.4s, v0.4s 1657 add v10.4s, v10.4s, v28.4s 1658 ldr q28, [sp, #352] 1659 orr v2.16b, v2.16b, v13.16b 1660 orr v1.16b, v1.16b, v14.16b 1661 add v17.4s, v17.4s, v30.4s 1662 add v20.4s, v20.4s, v3.4s 1663 eor v16.16b, v16.16b, v22.16b 1664 add v10.4s, v10.4s, v5.4s 1665 add v17.4s, v17.4s, v2.4s 1666 add v20.4s, v20.4s, v1.4s 1667 tbl v16.16b, { v16.16b }, v18.16b 1668 eor v25.16b, v25.16b, v10.16b 1669 eor v6.16b, v6.16b, v17.16b 1670 eor v4.16b, v4.16b, v20.16b 1671 add v21.4s, v21.4s, v16.4s 1672 tbl v25.16b, { v25.16b }, v18.16b 1673 tbl v6.16b, { v6.16b }, v18.16b 1674 tbl v4.16b, { v4.16b }, v18.16b 1675 eor v0.16b, v21.16b, v0.16b 1676 add v19.4s, v19.4s, v25.4s 1677 add v26.4s, v26.4s, v6.4s 1678 add v7.4s, v7.4s, v4.4s 1679 ushr v12.4s, v0.4s, #7 1680 shl v0.4s, v0.4s, #25 1681 eor v5.16b, v19.16b, v5.16b 1682 eor v2.16b, v26.16b, v2.16b 1683 eor v1.16b, v7.16b, v1.16b 1684 orr v0.16b, v0.16b, v12.16b 1685 ushr v12.4s, v5.4s, #7 1686 shl v5.4s, v5.4s, #25 1687 add v10.4s, v10.4s, v23.4s 1688 ushr v13.4s, v2.4s, #7 1689 shl v2.4s, v2.4s, #25 1690 ushr v14.4s, v1.4s, #7 1691 shl v1.4s, v1.4s, #25 1692 orr v5.16b, v5.16b, v12.16b 1693 add v10.4s, v10.4s, v0.4s 1694 add v20.4s, v20.4s, v24.4s 1695 ldr q24, [sp, #144] 1696 orr v2.16b, v2.16b, v13.16b 1697 orr v1.16b, v1.16b, v14.16b 1698 add v22.4s, v22.4s, v9.4s 1699 add v17.4s, v17.4s, v11.4s 1700 eor v4.16b, v4.16b, v10.16b 1701 add v20.4s, v20.4s, v5.4s 1702 add v22.4s, v22.4s, v2.4s 1703 add v17.4s, v17.4s, v1.4s 1704 tbl v4.16b, { v4.16b }, v27.16b 1705 eor v6.16b, v6.16b, v20.16b 1706 eor v25.16b, v25.16b, v22.16b 1707 eor v16.16b, v16.16b, v17.16b 1708 add v26.4s, v26.4s, v4.4s 1709 tbl v6.16b, { v6.16b }, v27.16b 1710 tbl v25.16b, { v25.16b }, v27.16b 1711 tbl v16.16b, { v16.16b }, v27.16b 1712 eor v0.16b, v26.16b, v0.16b 1713 add v21.4s, v21.4s, v6.4s 1714 add v7.4s, v7.4s, v25.4s 1715 add v19.4s, v19.4s, v16.4s 1716 ushr v12.4s, v0.4s, #12 1717 shl v0.4s, v0.4s, #20 1718 eor v5.16b, v21.16b, v5.16b 1719 eor v2.16b, v7.16b, v2.16b 1720 eor v1.16b, v19.16b, v1.16b 1721 orr v0.16b, v0.16b, v12.16b 1722 add v10.4s, v10.4s, v15.4s 1723 ushr v14.4s, v5.4s, #12 1724 shl v5.4s, v5.4s, #20 1725 mov v30.16b, v3.16b 1726 ldr q3, [sp, #256] 1727 ushr v12.4s, v2.4s, #12 1728 shl v2.4s, v2.4s, #20 1729 ushr v13.4s, v1.4s, #12 1730 shl v1.4s, v1.4s, #20 1731 add v10.4s, v10.4s, v0.4s 1732 orr v5.16b, v5.16b, v14.16b 1733 add v20.4s, v20.4s, v3.4s 1734 orr v2.16b, v2.16b, v12.16b 1735 orr v1.16b, v1.16b, v13.16b 1736 add v22.4s, v22.4s, v24.4s 1737 add v17.4s, v17.4s, v28.4s 1738 eor v4.16b, v4.16b, v10.16b 1739 add v20.4s, v20.4s, v5.4s 1740 add v22.4s, v22.4s, v2.4s 1741 add v17.4s, v17.4s, v1.4s 1742 tbl v4.16b, { v4.16b }, v18.16b 1743 eor v6.16b, v6.16b, v20.16b 1744 eor v25.16b, v25.16b, v22.16b 1745 eor v16.16b, v16.16b, v17.16b 1746 add v26.4s, v26.4s, v4.4s 1747 tbl v6.16b, { v6.16b }, v18.16b 1748 tbl v25.16b, { v25.16b }, v18.16b 1749 tbl v16.16b, { v16.16b }, v18.16b 1750 eor v0.16b, v26.16b, v0.16b 1751 add v21.4s, v21.4s, v6.4s 1752 add v7.4s, v7.4s, v25.4s 1753 add v19.4s, v19.4s, v16.4s 1754 ushr v12.4s, v0.4s, #7 1755 shl v0.4s, v0.4s, #25 1756 eor v5.16b, v21.16b, v5.16b 1757 eor v2.16b, v7.16b, v2.16b 1758 eor v1.16b, v19.16b, v1.16b 1759 orr v0.16b, v0.16b, v12.16b 1760 ushr v12.4s, v5.4s, #7 1761 shl v5.4s, v5.4s, #25 1762 mov v23.16b, v9.16b 1763 ldr q9, [sp, #112] 1764 ushr v13.4s, v2.4s, #7 1765 shl v2.4s, v2.4s, #25 1766 ushr v14.4s, v1.4s, #7 1767 shl v1.4s, v1.4s, #25 1768 orr v5.16b, v5.16b, v12.16b 1769 add v9.4s, v10.4s, v9.4s 1770 orr v2.16b, v2.16b, v13.16b 1771 orr v1.16b, v1.16b, v14.16b 1772 ldr q14, [sp, #64] 1773 add v22.4s, v22.4s, v31.4s 1774 add v17.4s, v17.4s, v30.4s 1775 add v20.4s, v20.4s, v8.4s 1776 add v9.4s, v9.4s, v5.4s 1777 add v22.4s, v22.4s, v0.4s 1778 add v17.4s, v17.4s, v2.4s 1779 add v20.4s, v20.4s, v1.4s 1780 eor v25.16b, v25.16b, v9.16b 1781 eor v16.16b, v16.16b, v22.16b 1782 eor v6.16b, v6.16b, v17.16b 1783 eor v4.16b, v4.16b, v20.16b 1784 tbl v25.16b, { v25.16b }, v27.16b 1785 tbl v16.16b, { v16.16b }, v27.16b 1786 tbl v6.16b, { v6.16b }, v27.16b 1787 tbl v4.16b, { v4.16b }, v27.16b 1788 add v19.4s, v19.4s, v25.4s 1789 add v21.4s, v21.4s, v16.4s 1790 add v26.4s, v26.4s, v6.4s 1791 add v7.4s, v7.4s, v4.4s 1792 eor v5.16b, v5.16b, v19.16b 1793 eor v0.16b, v21.16b, v0.16b 1794 eor v2.16b, v26.16b, v2.16b 1795 eor v1.16b, v7.16b, v1.16b 1796 ushr v30.4s, v5.4s, #12 1797 shl v5.4s, v5.4s, #20 1798 ushr v10.4s, v0.4s, #12 1799 shl v0.4s, v0.4s, #20 1800 ushr v12.4s, v2.4s, #12 1801 shl v2.4s, v2.4s, #20 1802 ushr v13.4s, v1.4s, #12 1803 shl v1.4s, v1.4s, #20 1804 orr v5.16b, v5.16b, v30.16b 1805 add v30.4s, v9.4s, v29.4s 1806 add v22.4s, v22.4s, v23.4s 1807 ldr q23, [sp, #192] 1808 orr v0.16b, v0.16b, v10.16b 1809 orr v2.16b, v2.16b, v12.16b 1810 orr v1.16b, v1.16b, v13.16b 1811 add v17.4s, v17.4s, v23.4s 1812 add v20.4s, v20.4s, v28.4s 1813 add v23.4s, v30.4s, v5.4s 1814 add v22.4s, v22.4s, v0.4s 1815 add v17.4s, v17.4s, v2.4s 1816 add v20.4s, v20.4s, v1.4s 1817 eor v25.16b, v25.16b, v23.16b 1818 eor v16.16b, v16.16b, v22.16b 1819 eor v6.16b, v6.16b, v17.16b 1820 eor v4.16b, v4.16b, v20.16b 1821 tbl v25.16b, { v25.16b }, v18.16b 1822 tbl v16.16b, { v16.16b }, v18.16b 1823 tbl v6.16b, { v6.16b }, v18.16b 1824 tbl v4.16b, { v4.16b }, v18.16b 1825 add v19.4s, v19.4s, v25.4s 1826 add v21.4s, v21.4s, v16.4s 1827 add v26.4s, v26.4s, v6.4s 1828 add v7.4s, v7.4s, v4.4s 1829 eor v5.16b, v19.16b, v5.16b 1830 eor v0.16b, v21.16b, v0.16b 1831 eor v2.16b, v26.16b, v2.16b 1832 eor v1.16b, v7.16b, v1.16b 1833 ushr v28.4s, v5.4s, #7 1834 shl v5.4s, v5.4s, #25 1835 ushr v30.4s, v0.4s, #7 1836 shl v0.4s, v0.4s, #25 1837 ushr v31.4s, v2.4s, #7 1838 shl v2.4s, v2.4s, #25 1839 ushr v8.4s, v1.4s, #7 1840 shl v1.4s, v1.4s, #25 1841 orr v5.16b, v5.16b, v28.16b 1842 ldr q28, [sp, #176] 1843 orr v0.16b, v0.16b, v30.16b 1844 orr v2.16b, v2.16b, v31.16b 1845 orr v1.16b, v1.16b, v8.16b 1846 add v23.4s, v23.4s, v28.4s 1847 add v22.4s, v22.4s, v11.4s 1848 add v17.4s, v17.4s, v15.4s 1849 add v20.4s, v20.4s, v3.4s 1850 ldr q3, [sp, #272] 1851 add v23.4s, v23.4s, v0.4s 1852 add v22.4s, v22.4s, v2.4s 1853 add v17.4s, v17.4s, v1.4s 1854 add v20.4s, v20.4s, v5.4s 1855 eor v4.16b, v4.16b, v23.16b 1856 eor v25.16b, v25.16b, v22.16b 1857 eor v16.16b, v16.16b, v17.16b 1858 eor v6.16b, v6.16b, v20.16b 1859 tbl v4.16b, { v4.16b }, v27.16b 1860 tbl v25.16b, { v25.16b }, v27.16b 1861 tbl v16.16b, { v16.16b }, v27.16b 1862 tbl v6.16b, { v6.16b }, v27.16b 1863 add v26.4s, v26.4s, v4.4s 1864 add v7.4s, v7.4s, v25.4s 1865 add v19.4s, v19.4s, v16.4s 1866 add v21.4s, v21.4s, v6.4s 1867 eor v0.16b, v26.16b, v0.16b 1868 eor v2.16b, v7.16b, v2.16b 1869 eor v1.16b, v19.16b, v1.16b 1870 eor v5.16b, v21.16b, v5.16b 1871 add v3.4s, v22.4s, v3.4s 1872 ldr q22, [sp, #160] 1873 ushr v28.4s, v0.4s, #12 1874 shl v0.4s, v0.4s, #20 1875 ushr v29.4s, v2.4s, #12 1876 shl v2.4s, v2.4s, #20 1877 ushr v30.4s, v1.4s, #12 1878 shl v1.4s, v1.4s, #20 1879 ushr v31.4s, v5.4s, #12 1880 shl v5.4s, v5.4s, #20 1881 add v17.4s, v17.4s, v22.4s 1882 ldr q22, [sp, #240] 1883 orr v0.16b, v0.16b, v28.16b 1884 prfm pldl1keep, [x23, #256] 1885 orr v2.16b, v2.16b, v29.16b 1886 prfm pldl1keep, [x24, #256] 1887 orr v1.16b, v1.16b, v30.16b 1888 prfm pldl1keep, [x22, #256] 1889 orr v5.16b, v5.16b, v31.16b 1890 prfm pldl1keep, [x25, #256] 1891 add v23.4s, v23.4s, v24.4s 1892 add v20.4s, v20.4s, v22.4s 1893 add v3.4s, v3.4s, v2.4s 1894 add v17.4s, v17.4s, v1.4s 1895 add v22.4s, v23.4s, v0.4s 1896 add v20.4s, v20.4s, v5.4s 1897 eor v23.16b, v25.16b, v3.16b 1898 eor v16.16b, v16.16b, v17.16b 1899 eor v4.16b, v4.16b, v22.16b 1900 eor v6.16b, v6.16b, v20.16b 1901 tbl v23.16b, { v23.16b }, v18.16b 1902 tbl v16.16b, { v16.16b }, v18.16b 1903 tbl v4.16b, { v4.16b }, v18.16b 1904 tbl v6.16b, { v6.16b }, v18.16b 1905 add v7.4s, v7.4s, v23.4s 1906 add v19.4s, v19.4s, v16.4s 1907 add v18.4s, v26.4s, v4.4s 1908 add v21.4s, v21.4s, v6.4s 1909 eor v2.16b, v7.16b, v2.16b 1910 eor v1.16b, v19.16b, v1.16b 1911 eor v0.16b, v18.16b, v0.16b 1912 eor v5.16b, v21.16b, v5.16b 1913 ushr v25.4s, v2.4s, #7 1914 shl v2.4s, v2.4s, #25 1915 ushr v24.4s, v0.4s, #7 1916 shl v0.4s, v0.4s, #25 1917 ushr v26.4s, v1.4s, #7 1918 shl v1.4s, v1.4s, #25 1919 ushr v27.4s, v5.4s, #7 1920 shl v5.4s, v5.4s, #25 1921 orr v0.16b, v0.16b, v24.16b 1922 orr v2.16b, v2.16b, v25.16b 1923 orr v1.16b, v1.16b, v26.16b 1924 orr v5.16b, v5.16b, v27.16b 1925 movi v13.4s, #64 1926 eor v29.16b, v19.16b, v22.16b 1927 eor v8.16b, v21.16b, v3.16b 1928 eor v30.16b, v17.16b, v18.16b 1929 eor v31.16b, v20.16b, v7.16b 1930 eor v24.16b, v5.16b, v23.16b 1931 eor v18.16b, v0.16b, v16.16b 1932 eor v25.16b, v2.16b, v6.16b 1933 eor v26.16b, v1.16b, v4.16b 1934 cbnz x21, .LBB3_5 1935 b .LBB3_2 1936.LBB3_6: 1937 cbz x1, .LBB3_14 1938 adrp x12, .LCPI3_3 1939 ldr q0, [x11, :lo12:.LCPI3_1] 1940 orr w11, w7, w6 1941 ldr q2, [x10, :lo12:.LCPI3_2] 1942 ldr q1, [x12, :lo12:.LCPI3_3] 1943 and x12, x5, #0x1 1944.LBB3_8: 1945 movi v3.4s, #64 1946 lsr x13, x4, #32 1947 ldp q5, q4, [x3] 1948 mov x15, x2 1949 mov w14, w11 1950 mov v3.s[0], w4 1951 ldr x10, [x0] 1952 mov v3.s[1], w13 1953 b .LBB3_11 1954.LBB3_9: 1955 orr w14, w14, w9 1956.LBB3_10: 1957 ldp q6, q7, [x10] 1958 mov v16.16b, v3.16b 1959 and w14, w14, #0xff 1960 add v5.4s, v5.4s, v4.4s 1961 mov x15, x13 1962 mov v16.s[3], w14 1963 add x14, x10, #32 1964 uzp1 v17.4s, v6.4s, v7.4s 1965 add x10, x10, #64 1966 add v5.4s, v5.4s, v17.4s 1967 eor v16.16b, v5.16b, v16.16b 1968 tbl v16.16b, { v16.16b }, v0.16b 1969 add v18.4s, v16.4s, v1.4s 1970 eor v19.16b, v18.16b, v4.16b 1971 uzp2 v4.4s, v6.4s, v7.4s 1972 ushr v6.4s, v19.4s, #12 1973 shl v7.4s, v19.4s, #20 1974 ld2 { v19.4s, v20.4s }, [x14] 1975 add v5.4s, v5.4s, v4.4s 1976 mov w14, w6 1977 orr v6.16b, v7.16b, v6.16b 1978 add v5.4s, v5.4s, v6.4s 1979 eor v7.16b, v16.16b, v5.16b 1980 add v5.4s, v5.4s, v19.4s 1981 tbl v7.16b, { v7.16b }, v2.16b 1982 ext v5.16b, v5.16b, v5.16b, #12 1983 add v16.4s, v18.4s, v7.4s 1984 ext v7.16b, v7.16b, v7.16b, #8 1985 eor v6.16b, v6.16b, v16.16b 1986 ext v16.16b, v16.16b, v16.16b, #4 1987 ushr v18.4s, v6.4s, #7 1988 shl v6.4s, v6.4s, #25 1989 orr v6.16b, v6.16b, v18.16b 1990 ext v18.16b, v20.16b, v20.16b, #12 1991 add v5.4s, v5.4s, v6.4s 1992 eor v7.16b, v5.16b, v7.16b 1993 add v5.4s, v5.4s, v18.4s 1994 tbl v7.16b, { v7.16b }, v0.16b 1995 add v16.4s, v16.4s, v7.4s 1996 eor v6.16b, v6.16b, v16.16b 1997 ushr v21.4s, v6.4s, #12 1998 shl v6.4s, v6.4s, #20 1999 orr v6.16b, v6.16b, v21.16b 2000 uzp1 v21.4s, v17.4s, v17.4s 2001 add v5.4s, v5.4s, v6.4s 2002 ext v21.16b, v21.16b, v17.16b, #8 2003 eor v7.16b, v7.16b, v5.16b 2004 uzp2 v21.4s, v21.4s, v4.4s 2005 tbl v7.16b, { v7.16b }, v2.16b 2006 add v5.4s, v5.4s, v21.4s 2007 add v16.4s, v16.4s, v7.4s 2008 ext v5.16b, v5.16b, v5.16b, #4 2009 ext v7.16b, v7.16b, v7.16b, #8 2010 eor v6.16b, v6.16b, v16.16b 2011 ushr v22.4s, v6.4s, #7 2012 shl v6.4s, v6.4s, #25 2013 orr v6.16b, v6.16b, v22.16b 2014 add v22.4s, v5.4s, v6.4s 2015 eor v5.16b, v22.16b, v7.16b 2016 ext v7.16b, v16.16b, v16.16b, #12 2017 tbl v16.16b, { v5.16b }, v0.16b 2018 ext v5.16b, v17.16b, v17.16b, #12 2019 add v7.4s, v7.4s, v16.4s 2020 ext v5.16b, v17.16b, v5.16b, #12 2021 ext v17.16b, v19.16b, v19.16b, #12 2022 mov v19.16b, v18.16b 2023 eor v6.16b, v6.16b, v7.16b 2024 rev64 v5.4s, v5.4s 2025 mov v19.s[1], v17.s[2] 2026 ushr v20.4s, v6.4s, #12 2027 shl v6.4s, v6.4s, #20 2028 trn2 v5.4s, v5.4s, v19.4s 2029 orr v6.16b, v6.16b, v20.16b 2030 zip1 v20.2d, v18.2d, v4.2d 2031 zip2 v4.4s, v4.4s, v18.4s 2032 add v19.4s, v6.4s, v5.4s 2033 mov v20.s[3], v17.s[3] 2034 add v19.4s, v19.4s, v22.4s 2035 ext v22.16b, v20.16b, v20.16b, #12 2036 eor v16.16b, v16.16b, v19.16b 2037 ext v19.16b, v19.16b, v19.16b, #12 2038 tbl v16.16b, { v16.16b }, v2.16b 2039 add v7.4s, v7.4s, v16.4s 2040 ext v16.16b, v16.16b, v16.16b, #8 2041 eor v6.16b, v6.16b, v7.16b 2042 ext v7.16b, v7.16b, v7.16b, #4 2043 ushr v23.4s, v6.4s, #7 2044 shl v24.4s, v6.4s, #25 2045 uzp1 v6.4s, v20.4s, v22.4s 2046 orr v20.16b, v24.16b, v23.16b 2047 add v22.4s, v20.4s, v6.4s 2048 add v19.4s, v22.4s, v19.4s 2049 eor v16.16b, v19.16b, v16.16b 2050 tbl v16.16b, { v16.16b }, v0.16b 2051 add v7.4s, v7.4s, v16.4s 2052 eor v18.16b, v20.16b, v7.16b 2053 zip1 v20.4s, v4.4s, v17.4s 2054 zip1 v4.4s, v17.4s, v4.4s 2055 ushr v17.4s, v18.4s, #12 2056 shl v18.4s, v18.4s, #20 2057 ext v20.16b, v4.16b, v20.16b, #8 2058 orr v4.16b, v18.16b, v17.16b 2059 ext v18.16b, v21.16b, v21.16b, #4 2060 add v17.4s, v4.4s, v20.4s 2061 add v17.4s, v17.4s, v19.4s 2062 uzp1 v19.4s, v18.4s, v18.4s 2063 eor v16.16b, v16.16b, v17.16b 2064 ext v19.16b, v19.16b, v18.16b, #8 2065 tbl v16.16b, { v16.16b }, v2.16b 2066 uzp2 v19.4s, v19.4s, v5.4s 2067 add v7.4s, v7.4s, v16.4s 2068 add v17.4s, v17.4s, v19.4s 2069 ext v16.16b, v16.16b, v16.16b, #8 2070 eor v4.16b, v4.16b, v7.16b 2071 ext v17.16b, v17.16b, v17.16b, #4 2072 ext v7.16b, v7.16b, v7.16b, #12 2073 ushr v21.4s, v4.4s, #7 2074 shl v4.4s, v4.4s, #25 2075 orr v4.16b, v4.16b, v21.16b 2076 ext v21.16b, v18.16b, v18.16b, #12 2077 add v17.4s, v17.4s, v4.4s 2078 ext v18.16b, v18.16b, v21.16b, #12 2079 mov v21.16b, v20.16b 2080 eor v16.16b, v17.16b, v16.16b 2081 rev64 v18.4s, v18.4s 2082 mov v21.s[1], v6.s[2] 2083 tbl v16.16b, { v16.16b }, v0.16b 2084 add v7.4s, v7.4s, v16.4s 2085 eor v4.16b, v4.16b, v7.16b 2086 ushr v22.4s, v4.4s, #12 2087 shl v23.4s, v4.4s, #20 2088 trn2 v4.4s, v18.4s, v21.4s 2089 orr v18.16b, v23.16b, v22.16b 2090 add v21.4s, v18.4s, v4.4s 2091 add v17.4s, v21.4s, v17.4s 2092 zip1 v21.2d, v20.2d, v5.2d 2093 zip2 v5.4s, v5.4s, v20.4s 2094 eor v16.16b, v16.16b, v17.16b 2095 mov v21.s[3], v6.s[3] 2096 ext v17.16b, v17.16b, v17.16b, #12 2097 zip1 v20.4s, v5.4s, v6.4s 2098 tbl v16.16b, { v16.16b }, v2.16b 2099 zip1 v5.4s, v6.4s, v5.4s 2100 add v22.4s, v7.4s, v16.4s 2101 ext v16.16b, v16.16b, v16.16b, #8 2102 ext v20.16b, v5.16b, v20.16b, #8 2103 eor v7.16b, v18.16b, v22.16b 2104 ext v18.16b, v21.16b, v21.16b, #12 2105 ushr v23.4s, v7.4s, #7 2106 shl v24.4s, v7.4s, #25 2107 uzp1 v7.4s, v21.4s, v18.4s 2108 orr v18.16b, v24.16b, v23.16b 2109 add v21.4s, v18.4s, v7.4s 2110 add v17.4s, v21.4s, v17.4s 2111 ext v21.16b, v22.16b, v22.16b, #4 2112 eor v16.16b, v17.16b, v16.16b 2113 tbl v16.16b, { v16.16b }, v0.16b 2114 add v21.4s, v21.4s, v16.4s 2115 eor v18.16b, v18.16b, v21.16b 2116 ushr v6.4s, v18.4s, #12 2117 shl v18.4s, v18.4s, #20 2118 orr v5.16b, v18.16b, v6.16b 2119 add v6.4s, v5.4s, v20.4s 2120 add v6.4s, v6.4s, v17.4s 2121 ext v17.16b, v19.16b, v19.16b, #4 2122 eor v16.16b, v16.16b, v6.16b 2123 uzp1 v18.4s, v17.4s, v17.4s 2124 tbl v16.16b, { v16.16b }, v2.16b 2125 ext v18.16b, v18.16b, v17.16b, #8 2126 add v19.4s, v21.4s, v16.4s 2127 uzp2 v18.4s, v18.4s, v4.4s 2128 ext v16.16b, v16.16b, v16.16b, #8 2129 eor v5.16b, v5.16b, v19.16b 2130 add v6.4s, v6.4s, v18.4s 2131 ext v19.16b, v19.16b, v19.16b, #12 2132 ushr v21.4s, v5.4s, #7 2133 shl v5.4s, v5.4s, #25 2134 ext v6.16b, v6.16b, v6.16b, #4 2135 orr v5.16b, v5.16b, v21.16b 2136 ext v21.16b, v17.16b, v17.16b, #12 2137 add v6.4s, v6.4s, v5.4s 2138 ext v17.16b, v17.16b, v21.16b, #12 2139 mov v21.16b, v20.16b 2140 eor v16.16b, v6.16b, v16.16b 2141 rev64 v17.4s, v17.4s 2142 mov v21.s[1], v7.s[2] 2143 tbl v16.16b, { v16.16b }, v0.16b 2144 add v19.4s, v19.4s, v16.4s 2145 eor v5.16b, v5.16b, v19.16b 2146 ushr v22.4s, v5.4s, #12 2147 shl v23.4s, v5.4s, #20 2148 trn2 v5.4s, v17.4s, v21.4s 2149 orr v17.16b, v23.16b, v22.16b 2150 add v21.4s, v17.4s, v5.4s 2151 add v6.4s, v21.4s, v6.4s 2152 eor v16.16b, v16.16b, v6.16b 2153 ext v6.16b, v6.16b, v6.16b, #12 2154 tbl v21.16b, { v16.16b }, v2.16b 2155 zip1 v16.2d, v20.2d, v4.2d 2156 zip2 v4.4s, v4.4s, v20.4s 2157 add v19.4s, v19.4s, v21.4s 2158 mov v16.s[3], v7.s[3] 2159 ext v21.16b, v21.16b, v21.16b, #8 2160 zip1 v20.4s, v4.4s, v7.4s 2161 eor v17.16b, v17.16b, v19.16b 2162 ext v22.16b, v16.16b, v16.16b, #12 2163 ext v19.16b, v19.16b, v19.16b, #4 2164 zip1 v4.4s, v7.4s, v4.4s 2165 ushr v23.4s, v17.4s, #7 2166 shl v17.4s, v17.4s, #25 2167 uzp1 v16.4s, v16.4s, v22.4s 2168 ext v4.16b, v4.16b, v20.16b, #8 2169 orr v17.16b, v17.16b, v23.16b 2170 add v22.4s, v17.4s, v16.4s 2171 add v6.4s, v22.4s, v6.4s 2172 eor v21.16b, v6.16b, v21.16b 2173 tbl v21.16b, { v21.16b }, v0.16b 2174 add v19.4s, v19.4s, v21.4s 2175 eor v17.16b, v17.16b, v19.16b 2176 ushr v7.4s, v17.4s, #12 2177 shl v17.4s, v17.4s, #20 2178 orr v7.16b, v17.16b, v7.16b 2179 add v17.4s, v7.4s, v4.4s 2180 add v6.4s, v17.4s, v6.4s 2181 ext v17.16b, v18.16b, v18.16b, #4 2182 eor v18.16b, v21.16b, v6.16b 2183 uzp1 v20.4s, v17.4s, v17.4s 2184 tbl v18.16b, { v18.16b }, v2.16b 2185 ext v20.16b, v20.16b, v17.16b, #8 2186 add v19.4s, v19.4s, v18.4s 2187 uzp2 v20.4s, v20.4s, v5.4s 2188 ext v18.16b, v18.16b, v18.16b, #8 2189 eor v7.16b, v7.16b, v19.16b 2190 add v6.4s, v6.4s, v20.4s 2191 ushr v21.4s, v7.4s, #7 2192 shl v7.4s, v7.4s, #25 2193 ext v6.16b, v6.16b, v6.16b, #4 2194 orr v7.16b, v7.16b, v21.16b 2195 add v21.4s, v6.4s, v7.4s 2196 eor v6.16b, v21.16b, v18.16b 2197 ext v18.16b, v19.16b, v19.16b, #12 2198 tbl v19.16b, { v6.16b }, v0.16b 2199 ext v6.16b, v17.16b, v17.16b, #12 2200 add v18.4s, v18.4s, v19.4s 2201 ext v6.16b, v17.16b, v6.16b, #12 2202 mov v17.16b, v4.16b 2203 eor v7.16b, v7.16b, v18.16b 2204 rev64 v6.4s, v6.4s 2205 mov v17.s[1], v16.s[2] 2206 ushr v22.4s, v7.4s, #12 2207 shl v7.4s, v7.4s, #20 2208 trn2 v6.4s, v6.4s, v17.4s 2209 orr v7.16b, v7.16b, v22.16b 2210 add v17.4s, v7.4s, v6.4s 2211 add v17.4s, v17.4s, v21.4s 2212 zip1 v21.2d, v4.2d, v5.2d 2213 zip2 v4.4s, v5.4s, v4.4s 2214 eor v19.16b, v19.16b, v17.16b 2215 mov v21.s[3], v16.s[3] 2216 ext v17.16b, v17.16b, v17.16b, #12 2217 tbl v19.16b, { v19.16b }, v2.16b 2218 ext v22.16b, v21.16b, v21.16b, #12 2219 add v18.4s, v18.4s, v19.4s 2220 ext v19.16b, v19.16b, v19.16b, #8 2221 eor v7.16b, v7.16b, v18.16b 2222 ext v18.16b, v18.16b, v18.16b, #4 2223 ushr v23.4s, v7.4s, #7 2224 shl v24.4s, v7.4s, #25 2225 uzp1 v7.4s, v21.4s, v22.4s 2226 orr v21.16b, v24.16b, v23.16b 2227 add v22.4s, v21.4s, v7.4s 2228 add v17.4s, v22.4s, v17.4s 2229 eor v19.16b, v17.16b, v19.16b 2230 tbl v19.16b, { v19.16b }, v0.16b 2231 add v18.4s, v18.4s, v19.4s 2232 eor v5.16b, v21.16b, v18.16b 2233 zip1 v21.4s, v4.4s, v16.4s 2234 zip1 v4.4s, v16.4s, v4.4s 2235 ushr v16.4s, v5.4s, #12 2236 shl v5.4s, v5.4s, #20 2237 ext v21.16b, v4.16b, v21.16b, #8 2238 orr v4.16b, v5.16b, v16.16b 2239 ext v16.16b, v20.16b, v20.16b, #4 2240 mov v23.16b, v21.16b 2241 add v5.4s, v4.4s, v21.4s 2242 mov v23.s[1], v7.s[2] 2243 add v5.4s, v5.4s, v17.4s 2244 eor v17.16b, v19.16b, v5.16b 2245 uzp1 v19.4s, v16.4s, v16.4s 2246 tbl v17.16b, { v17.16b }, v2.16b 2247 ext v19.16b, v19.16b, v16.16b, #8 2248 add v18.4s, v18.4s, v17.4s 2249 uzp2 v19.4s, v19.4s, v6.4s 2250 eor v4.16b, v4.16b, v18.16b 2251 add v5.4s, v5.4s, v19.4s 2252 ext v19.16b, v19.16b, v19.16b, #4 2253 ushr v20.4s, v4.4s, #7 2254 shl v4.4s, v4.4s, #25 2255 ext v5.16b, v5.16b, v5.16b, #4 2256 orr v20.16b, v4.16b, v20.16b 2257 ext v4.16b, v17.16b, v17.16b, #8 2258 add v17.4s, v5.4s, v20.4s 2259 ext v5.16b, v18.16b, v18.16b, #12 2260 eor v4.16b, v17.16b, v4.16b 2261 tbl v18.16b, { v4.16b }, v0.16b 2262 ext v4.16b, v16.16b, v16.16b, #12 2263 add v22.4s, v5.4s, v18.4s 2264 ext v4.16b, v16.16b, v4.16b, #12 2265 eor v5.16b, v20.16b, v22.16b 2266 rev64 v16.4s, v4.4s 2267 ushr v20.4s, v5.4s, #12 2268 shl v24.4s, v5.4s, #20 2269 trn2 v5.4s, v16.4s, v23.4s 2270 orr v16.16b, v24.16b, v20.16b 2271 add v20.4s, v16.4s, v5.4s 2272 add v17.4s, v20.4s, v17.4s 2273 zip1 v20.2d, v21.2d, v6.2d 2274 zip2 v6.4s, v6.4s, v21.4s 2275 eor v18.16b, v18.16b, v17.16b 2276 mov v20.s[3], v7.s[3] 2277 ext v17.16b, v17.16b, v17.16b, #12 2278 zip1 v21.4s, v6.4s, v7.4s 2279 tbl v18.16b, { v18.16b }, v2.16b 2280 ext v24.16b, v20.16b, v20.16b, #12 2281 zip1 v6.4s, v7.4s, v6.4s 2282 add v22.4s, v22.4s, v18.4s 2283 ext v18.16b, v18.16b, v18.16b, #8 2284 ext v6.16b, v6.16b, v21.16b, #8 2285 eor v16.16b, v16.16b, v22.16b 2286 ext v22.16b, v22.16b, v22.16b, #4 2287 zip1 v5.2d, v6.2d, v5.2d 2288 zip2 v4.4s, v4.4s, v6.4s 2289 ushr v25.4s, v16.4s, #7 2290 shl v26.4s, v16.4s, #25 2291 uzp1 v16.4s, v20.4s, v24.4s 2292 orr v20.16b, v26.16b, v25.16b 2293 mov v5.s[3], v16.s[3] 2294 add v24.4s, v20.4s, v16.4s 2295 add v17.4s, v24.4s, v17.4s 2296 eor v18.16b, v17.16b, v18.16b 2297 tbl v18.16b, { v18.16b }, v0.16b 2298 add v22.4s, v22.4s, v18.4s 2299 eor v20.16b, v20.16b, v22.16b 2300 ushr v7.4s, v20.4s, #12 2301 shl v20.4s, v20.4s, #20 2302 orr v7.16b, v20.16b, v7.16b 2303 add v20.4s, v7.4s, v6.4s 2304 add v17.4s, v20.4s, v17.4s 2305 ext v20.16b, v19.16b, v19.16b, #8 2306 eor v18.16b, v18.16b, v17.16b 2307 ext v17.16b, v17.16b, v17.16b, #4 2308 tbl v18.16b, { v18.16b }, v2.16b 2309 add v21.4s, v22.4s, v18.4s 2310 uzp2 v22.4s, v20.4s, v23.4s 2311 ext v18.16b, v18.16b, v18.16b, #8 2312 eor v7.16b, v7.16b, v21.16b 2313 ext v20.16b, v22.16b, v20.16b, #4 2314 ushr v22.4s, v7.4s, #7 2315 shl v7.4s, v7.4s, #25 2316 add v17.4s, v17.4s, v20.4s 2317 ext v20.16b, v21.16b, v21.16b, #12 2318 ext v21.16b, v19.16b, v19.16b, #12 2319 orr v7.16b, v7.16b, v22.16b 2320 ext v19.16b, v19.16b, v21.16b, #12 2321 add v17.4s, v17.4s, v7.4s 2322 mov v21.16b, v6.16b 2323 rev64 v19.4s, v19.4s 2324 eor v18.16b, v17.16b, v18.16b 2325 mov v21.s[1], v16.s[2] 2326 tbl v18.16b, { v18.16b }, v0.16b 2327 trn2 v19.4s, v19.4s, v21.4s 2328 add v20.4s, v20.4s, v18.4s 2329 eor v7.16b, v7.16b, v20.16b 2330 ushr v22.4s, v7.4s, #12 2331 shl v7.4s, v7.4s, #20 2332 orr v7.16b, v7.16b, v22.16b 2333 add v19.4s, v7.4s, v19.4s 2334 add v17.4s, v19.4s, v17.4s 2335 eor v18.16b, v18.16b, v17.16b 2336 ext v17.16b, v17.16b, v17.16b, #12 2337 tbl v18.16b, { v18.16b }, v2.16b 2338 add v19.4s, v20.4s, v18.4s 2339 ext v20.16b, v5.16b, v5.16b, #12 2340 ext v18.16b, v18.16b, v18.16b, #8 2341 eor v7.16b, v7.16b, v19.16b 2342 uzp1 v5.4s, v5.4s, v20.4s 2343 ushr v21.4s, v7.4s, #7 2344 shl v7.4s, v7.4s, #25 2345 orr v7.16b, v7.16b, v21.16b 2346 add v5.4s, v7.4s, v5.4s 2347 add v5.4s, v5.4s, v17.4s 2348 eor v17.16b, v5.16b, v18.16b 2349 ext v18.16b, v19.16b, v19.16b, #4 2350 tbl v17.16b, { v17.16b }, v0.16b 2351 add v18.4s, v18.4s, v17.4s 2352 eor v6.16b, v7.16b, v18.16b 2353 zip1 v7.4s, v4.4s, v16.4s 2354 zip1 v4.4s, v16.4s, v4.4s 2355 ushr v16.4s, v6.4s, #12 2356 shl v6.4s, v6.4s, #20 2357 ext v4.16b, v4.16b, v7.16b, #8 2358 orr v6.16b, v6.16b, v16.16b 2359 add v4.4s, v6.4s, v4.4s 2360 add v4.4s, v4.4s, v5.4s 2361 eor v5.16b, v17.16b, v4.16b 2362 ext v4.16b, v4.16b, v4.16b, #4 2363 tbl v5.16b, { v5.16b }, v2.16b 2364 add v7.4s, v18.4s, v5.4s 2365 eor v6.16b, v6.16b, v7.16b 2366 ext v7.16b, v7.16b, v7.16b, #12 2367 ushr v16.4s, v6.4s, #7 2368 shl v6.4s, v6.4s, #25 2369 orr v6.16b, v6.16b, v16.16b 2370 ext v16.16b, v5.16b, v5.16b, #8 2371 eor v5.16b, v4.16b, v7.16b 2372 eor v4.16b, v6.16b, v16.16b 2373.LBB3_11: 2374 subs x13, x15, #1 2375 b.eq .LBB3_9 2376 cbnz x15, .LBB3_10 2377 add x4, x4, x12 2378 add x0, x0, #8 2379 subs x1, x1, #1 2380 stp q5, q4, [x8], #32 2381 b.ne .LBB3_8 2382.LBB3_14: 2383 add sp, sp, #368 2384 ldp x20, x19, [sp, #128] 2385 ldp x22, x21, [sp, #112] 2386 ldp x24, x23, [sp, #96] 2387 ldp x26, x25, [sp, #80] 2388 ldp x29, x27, [sp, #64] 2389 ldp d9, d8, [sp, #48] 2390 ldp d11, d10, [sp, #32] 2391 ldp d13, d12, [sp, #16] 2392 ldp d15, d14, [sp], #144 2393 ret 2394.Lfunc_end3: 2395 .size zfs_blake3_hash_many_sse41, .Lfunc_end3-zfs_blake3_hash_many_sse41 2396 .cfi_endproc 2397 .section ".note.GNU-stack","",@progbits 2398#endif