1/************************************************************** 2 Copyright (c) 2019 Huawei Technologies Co., Ltd. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 * Redistributions of source code must retain the above copyright 8 notice, this list of conditions and the following disclaimer. 9 * Redistributions in binary form must reproduce the above copyright 10 notice, this list of conditions and the following disclaimer in 11 the documentation and/or other materials provided with the 12 distribution. 13 * Neither the name of Huawei Corporation nor the names of its 14 contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28**********************************************************************/ 29.text 30 31.global gf_5vect_mad_neon 32.type gf_5vect_mad_neon, %function 33 34 35/* arguments */ 36x_len .req x0 37x_vec .req x1 38x_vec_i .req x2 39x_tbl .req x3 40x_src .req x4 41x_dest .req x5 42 43/* returns */ 44w_ret .req w0 45 46/* local variables */ 47x_src_end .req x6 48x_dest1 .req x7 49x_dest2 .req x8 50x_dest3 .req x9 51x_dest4 .req x10 52x_dest5 .req x_dest 53x_tmp .req x11 54x_tbl1 .req x12 55x_tbl2 .req x13 56x_tbl3 .req x14 57x_tbl4 .req x15 58x_tbl5 .req x16 59x_const .req x17 60 61/* vectors */ 62v_mask0f .req v0 63v_tmp_lo .req v1 64v_tmp_hi .req v2 65v_tmp .req v3 66q_tmp .req q3 67 68v_gft1_lo .req v4 69v_gft1_hi .req v5 70v_gft2_lo .req v6 71v_gft2_hi .req v7 72v_gft3_lo .req v16 73v_gft3_hi .req v17 74q_gft1_lo .req q4 75q_gft1_hi .req q5 76q_gft2_lo .req q6 77q_gft2_hi .req q7 78q_gft3_lo .req q16 79q_gft3_hi .req q17 80 81v_gft4_lo .req v18 82v_gft4_hi .req v19 83q_gft4_lo .req q18 84q_gft4_hi .req q19 85v_gft5_lo .req v_gft2_lo 86v_gft5_hi .req v_gft2_hi 87q_gft5_lo .req q_gft2_lo 88q_gft5_hi .req q_gft2_hi 89 90v_data_0 .req v8 91v_data_1 .req v9 92v_data_2 .req v10 93v_data_3 .req v11 94q_data_0 .req q8 95q_data_1 .req q9 96q_data_2 .req q10 97q_data_3 .req q11 98 99v_data_0_lo .req v12 100v_data_1_lo .req v13 101v_data_2_lo .req v14 102v_data_3_lo .req v15 103v_data_0_hi .req v_data_0 104v_data_1_hi .req v_data_1 105v_data_2_hi .req v_data_2 106v_data_3_hi .req v_data_3 107 108v_d1_0 .req v20 109v_d1_1 .req v21 110v_d1_2 .req v22 111v_d1_3 .req v23 112v_d2_0 .req v24 113v_d2_1 .req v25 114v_d2_2 .req v26 115v_d2_3 .req v27 116v_d3_0 .req v28 117v_d3_1 .req v29 118v_d3_2 .req v30 119v_d3_3 .req v31 120q_d1_0 .req q20 121q_d1_1 .req q21 122q_d1_2 .req q22 123q_d1_3 .req q23 124q_d2_0 .req q24 125q_d2_1 .req q25 126q_d2_2 .req q26 127q_d2_3 .req q27 128q_d3_0 .req q28 129q_d3_1 .req q29 130q_d3_2 .req q30 131q_d3_3 .req q31 132 133v_d4_0 .req v_d1_0 134v_d4_1 .req v_d1_1 135v_d4_2 .req v_d1_2 136v_d4_3 .req v_d1_3 137q_d4_0 .req q_d1_0 138q_d4_1 .req q_d1_1 139q_d4_2 .req q_d1_2 140q_d4_3 .req q_d1_3 141v_d5_0 .req v_d2_0 142v_d5_1 .req v_d2_1 143v_d5_2 .req v_d2_2 144v_d5_3 .req v_d2_3 145q_d5_0 .req q_d2_0 146q_d5_1 .req q_d2_1 147q_d5_2 .req q_d2_2 148q_d5_3 .req q_d2_3 149 150v_data .req v21 151q_data .req q21 152v_data_lo .req v22 153v_data_hi .req v23 154 155gf_5vect_mad_neon: 156 /* less than 16 bytes, return_fail */ 157 cmp x_len, #16 158 blt .return_fail 159 160 movi v_mask0f.16b, #0x0f 161 lsl x_vec_i, x_vec_i, #5 162 lsl x_vec, x_vec, #5 163 add x_tbl1, x_tbl, x_vec_i 164 add x_tbl2, x_tbl1, x_vec 165 add x_tbl3, x_tbl2, x_vec 166 add x_tbl4, x_tbl3, x_vec 167 add x_tbl5, x_tbl4, x_vec 168 add x_src_end, x_src, x_len 169 ldr x_dest1, [x_dest, #8*0] 170 ldr x_dest2, [x_dest, #8*1] 171 ldr x_dest3, [x_dest, #8*2] 172 ldr x_dest4, [x_dest, #8*3] 173 ldr x_dest5, [x_dest, #8*4] 174 ldr q_gft1_lo, [x_tbl1] 175 ldr q_gft1_hi, [x_tbl1, #16] 176 ldr q_gft3_lo, [x_tbl3] 177 ldr q_gft3_hi, [x_tbl3, #16] 178 ldr q_gft4_lo, [x_tbl4] 179 ldr q_gft4_hi, [x_tbl4, #16] 180 181.Lloop64_init: 182 /* less than 64 bytes, goto Lloop16_init */ 183 cmp x_len, #64 184 blt .Lloop16_init 185 186 /* save d8 ~ d15 to stack */ 187 sub sp, sp, #64 188 stp d8, d9, [sp] 189 stp d10, d11, [sp, #16] 190 stp d12, d13, [sp, #32] 191 stp d14, d15, [sp, #48] 192 193 sub x_src_end, x_src_end, #64 194 195.Lloop64: 196 ldr q_data_0, [x_src, #16*0] 197 ldr q_data_1, [x_src, #16*1] 198 ldr q_data_2, [x_src, #16*2] 199 ldr q_data_3, [x_src, #16*3] 200 add x_src, x_src, #64 201 202 ldr q_d1_0, [x_dest1, #16*0] 203 ldr q_d1_1, [x_dest1, #16*1] 204 ldr q_d1_2, [x_dest1, #16*2] 205 ldr q_d1_3, [x_dest1, #16*3] 206 207 ldr q_d2_0, [x_dest2, #16*0] 208 ldr q_d2_1, [x_dest2, #16*1] 209 ldr q_d2_2, [x_dest2, #16*2] 210 ldr q_d2_3, [x_dest2, #16*3] 211 212 ldr q_d3_0, [x_dest3, #16*0] 213 ldr q_d3_1, [x_dest3, #16*1] 214 ldr q_d3_2, [x_dest3, #16*2] 215 ldr q_d3_3, [x_dest3, #16*3] 216 217 ldr q_gft2_lo, [x_tbl2] 218 ldr q_gft2_hi, [x_tbl2, #16] 219 220 and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b 221 and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b 222 and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b 223 and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b 224 225 ushr v_data_0_hi.16b, v_data_0.16b, #4 226 ushr v_data_1_hi.16b, v_data_1.16b, #4 227 ushr v_data_2_hi.16b, v_data_2.16b, #4 228 ushr v_data_3_hi.16b, v_data_3.16b, #4 229 230 /* dest1 */ 231 tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b 232 tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b 233 eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b 234 eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b 235 236 tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b 237 tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b 238 eor v_d1_1.16b, v_tmp_lo.16b, v_d1_1.16b 239 eor v_d1_1.16b, v_d1_1.16b, v_tmp_hi.16b 240 241 tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b 242 tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b 243 eor v_d1_2.16b, v_tmp_lo.16b, v_d1_2.16b 244 eor v_d1_2.16b, v_d1_2.16b, v_tmp_hi.16b 245 246 tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b 247 tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b 248 eor v_d1_3.16b, v_tmp_lo.16b, v_d1_3.16b 249 eor v_d1_3.16b, v_d1_3.16b, v_tmp_hi.16b 250 251 /* dest2 */ 252 tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_0_lo.16b 253 tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_0_hi.16b 254 eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b 255 eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b 256 257 tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_1_lo.16b 258 tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_1_hi.16b 259 eor v_d2_1.16b, v_tmp_lo.16b, v_d2_1.16b 260 eor v_d2_1.16b, v_d2_1.16b, v_tmp_hi.16b 261 262 tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_2_lo.16b 263 tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_2_hi.16b 264 eor v_d2_2.16b, v_tmp_lo.16b, v_d2_2.16b 265 eor v_d2_2.16b, v_d2_2.16b, v_tmp_hi.16b 266 267 tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_3_lo.16b 268 tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_3_hi.16b 269 eor v_d2_3.16b, v_tmp_lo.16b, v_d2_3.16b 270 eor v_d2_3.16b, v_d2_3.16b, v_tmp_hi.16b 271 272 /* dest3 */ 273 tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_0_lo.16b 274 tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_0_hi.16b 275 eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b 276 eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b 277 278 tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_1_lo.16b 279 tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_1_hi.16b 280 eor v_d3_1.16b, v_tmp_lo.16b, v_d3_1.16b 281 eor v_d3_1.16b, v_d3_1.16b, v_tmp_hi.16b 282 283 tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_2_lo.16b 284 tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_2_hi.16b 285 eor v_d3_2.16b, v_tmp_lo.16b, v_d3_2.16b 286 eor v_d3_2.16b, v_d3_2.16b, v_tmp_hi.16b 287 288 tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_3_lo.16b 289 tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_3_hi.16b 290 eor v_d3_3.16b, v_tmp_lo.16b, v_d3_3.16b 291 eor v_d3_3.16b, v_d3_3.16b, v_tmp_hi.16b 292 293 str q_d1_0, [x_dest1, #16*0] 294 str q_d1_1, [x_dest1, #16*1] 295 str q_d1_2, [x_dest1, #16*2] 296 str q_d1_3, [x_dest1, #16*3] 297 add x_dest1, x_dest1, #64 298 299 str q_d2_0, [x_dest2, #16*0] 300 str q_d2_1, [x_dest2, #16*1] 301 str q_d2_2, [x_dest2, #16*2] 302 str q_d2_3, [x_dest2, #16*3] 303 add x_dest2, x_dest2, #64 304 305 str q_d3_0, [x_dest3, #16*0] 306 str q_d3_1, [x_dest3, #16*1] 307 str q_d3_2, [x_dest3, #16*2] 308 str q_d3_3, [x_dest3, #16*3] 309 add x_dest3, x_dest3, #64 310 311 ldr q_d4_0, [x_dest4, #16*0] 312 ldr q_d4_1, [x_dest4, #16*1] 313 ldr q_d4_2, [x_dest4, #16*2] 314 ldr q_d4_3, [x_dest4, #16*3] 315 316 ldr q_d5_0, [x_dest5, #16*0] 317 ldr q_d5_1, [x_dest5, #16*1] 318 ldr q_d5_2, [x_dest5, #16*2] 319 ldr q_d5_3, [x_dest5, #16*3] 320 321 ldr q_gft5_lo, [x_tbl5] 322 ldr q_gft5_hi, [x_tbl5, #16] 323 324 /* dest4 */ 325 tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_0_lo.16b 326 tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_0_hi.16b 327 eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b 328 eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b 329 330 tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_1_lo.16b 331 tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_1_hi.16b 332 eor v_d4_1.16b, v_tmp_lo.16b, v_d4_1.16b 333 eor v_d4_1.16b, v_d4_1.16b, v_tmp_hi.16b 334 335 tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_2_lo.16b 336 tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_2_hi.16b 337 eor v_d4_2.16b, v_tmp_lo.16b, v_d4_2.16b 338 eor v_d4_2.16b, v_d4_2.16b, v_tmp_hi.16b 339 340 tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_3_lo.16b 341 tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_3_hi.16b 342 eor v_d4_3.16b, v_tmp_lo.16b, v_d4_3.16b 343 eor v_d4_3.16b, v_d4_3.16b, v_tmp_hi.16b 344 345 /* dest5 */ 346 tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_0_lo.16b 347 tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_0_hi.16b 348 eor v_d5_0.16b, v_tmp_lo.16b, v_d5_0.16b 349 eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b 350 351 tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_1_lo.16b 352 tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_1_hi.16b 353 eor v_d5_1.16b, v_tmp_lo.16b, v_d5_1.16b 354 eor v_d5_1.16b, v_d5_1.16b, v_tmp_hi.16b 355 356 tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_2_lo.16b 357 tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_2_hi.16b 358 eor v_d5_2.16b, v_tmp_lo.16b, v_d5_2.16b 359 eor v_d5_2.16b, v_d5_2.16b, v_tmp_hi.16b 360 361 tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_3_lo.16b 362 tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_3_hi.16b 363 eor v_d5_3.16b, v_tmp_lo.16b, v_d5_3.16b 364 eor v_d5_3.16b, v_d5_3.16b, v_tmp_hi.16b 365 366 str q_d4_0, [x_dest4, #16*0] 367 str q_d4_1, [x_dest4, #16*1] 368 str q_d4_2, [x_dest4, #16*2] 369 str q_d4_3, [x_dest4, #16*3] 370 add x_dest4, x_dest4, #64 371 372 str q_d5_0, [x_dest5, #16*0] 373 str q_d5_1, [x_dest5, #16*1] 374 str q_d5_2, [x_dest5, #16*2] 375 str q_d5_3, [x_dest5, #16*3] 376 add x_dest5, x_dest5, #64 377 378 cmp x_src, x_src_end 379 bls .Lloop64 380 381.Lloop64_end: 382 /* restore d8 ~ d15 */ 383 ldp d8, d9, [sp] 384 ldp d10, d11, [sp, #16] 385 ldp d12, d13, [sp, #32] 386 ldp d14, d15, [sp, #48] 387 add sp, sp, #64 388 add x_src_end, x_src_end, #64 389 390.Lloop16_init: 391 sub x_src_end, x_src_end, #16 392 cmp x_src, x_src_end 393 bhi .lessthan16_init 394 395.Lloop16: 396 ldr q_data, [x_src] 397 398 ldr q_d1_0, [x_dest1] 399 ldr q_d2_0, [x_dest2] 400 ldr q_d3_0, [x_dest3] 401 ldr q_gft2_lo, [x_tbl2] 402 ldr q_gft2_hi, [x_tbl2, #16] 403 404 and v_data_lo.16b, v_data.16b, v_mask0f.16b 405 ushr v_data_hi.16b, v_data.16b, #4 406 407 tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b 408 tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b 409 eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b 410 eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b 411 412 tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b 413 tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b 414 eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b 415 eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b 416 417 tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b 418 tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b 419 eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b 420 eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b 421 422 str q_d1_0, [x_dest1] 423 str q_d2_0, [x_dest2] 424 str q_d3_0, [x_dest3] 425 426 ldr q_d4_0, [x_dest4] 427 ldr q_d5_0, [x_dest5] 428 ldr q_gft5_lo, [x_tbl5] 429 ldr q_gft5_hi, [x_tbl5, #16] 430 431 tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b 432 tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b 433 eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b 434 eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b 435 436 tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b 437 tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b 438 eor v_d5_0.16b, v_tmp_lo.16b, v_d5_0.16b 439 eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b 440 441 str q_d4_0, [x_dest4] 442 str q_d5_0, [x_dest5] 443 444 add x_src, x_src, #16 445 add x_dest1, x_dest1, #16 446 add x_dest2, x_dest2, #16 447 add x_dest3, x_dest3, #16 448 add x_dest4, x_dest4, #16 449 add x_dest5, x_dest5, #16 450 cmp x_src, x_src_end 451 bls .Lloop16 452 453.lessthan16_init: 454 sub x_tmp, x_src, x_src_end 455 cmp x_tmp, #16 456 beq .return_pass 457 458.lessthan16: 459 mov x_src, x_src_end 460 sub x_dest1, x_dest1, x_tmp 461 sub x_dest2, x_dest2, x_tmp 462 sub x_dest3, x_dest3, x_tmp 463 sub x_dest4, x_dest4, x_tmp 464 sub x_dest5, x_dest5, x_tmp 465 466 ldr x_const, =const_tbl 467 sub x_const, x_const, x_tmp 468 ldr q_tmp, [x_const, #16] 469 470 ldr q_data, [x_src] 471 ldr q_d1_0, [x_dest1] 472 ldr q_d2_0, [x_dest2] 473 ldr q_d3_0, [x_dest3] 474 ldr q_gft2_lo, [x_tbl2] 475 ldr q_gft2_hi, [x_tbl2, #16] 476 477 and v_data_lo.16b, v_data.16b, v_mask0f.16b 478 ushr v_data_hi.16b, v_data.16b, #4 479 480 tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b 481 tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b 482 eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b 483 and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b 484 eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b 485 486 tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b 487 tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b 488 eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b 489 and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b 490 eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b 491 492 tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b 493 tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b 494 eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b 495 and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b 496 eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b 497 498 str q_d1_0, [x_dest1] 499 str q_d2_0, [x_dest2] 500 str q_d3_0, [x_dest3] 501 502 ldr q_d4_0, [x_dest4] 503 ldr q_d5_0, [x_dest5] 504 ldr q_gft5_lo, [x_tbl5] 505 ldr q_gft5_hi, [x_tbl5, #16] 506 507 tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b 508 tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b 509 eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b 510 and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b 511 eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b 512 513 tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b 514 tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b 515 eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b 516 and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b 517 eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b 518 519 str q_d4_0, [x_dest4] 520 str q_d5_0, [x_dest5] 521 522.return_pass: 523 mov w_ret, #0 524 ret 525 526.return_fail: 527 mov w_ret, #1 528 ret 529 530.section .data 531.balign 8 532const_tbl: 533 .dword 0x0000000000000000, 0x0000000000000000 534 .dword 0xffffffffffffffff, 0xffffffffffffffff 535