1/* 2 * ARM translation: AArch32 Neon instructions 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2005-2007 CodeSourcery 6 * Copyright (c) 2007 OpenedHand, Ltd. 7 * Copyright (c) 2020 Linaro, Ltd. 8 * 9 * This library is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * This library is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 21 */ 22 23/* 24 * This file is intended to be included from translate.c; it uses 25 * some macros and definitions provided by that file. 26 * It might be possible to convert it to a standalone .c file eventually. 27 */ 28 29static inline int plus1(DisasContext *s, int x) 30{ 31 return x + 1; 32} 33 34static inline int rsub_64(DisasContext *s, int x) 35{ 36 return 64 - x; 37} 38 39static inline int rsub_32(DisasContext *s, int x) 40{ 41 return 32 - x; 42} 43static inline int rsub_16(DisasContext *s, int x) 44{ 45 return 16 - x; 46} 47static inline int rsub_8(DisasContext *s, int x) 48{ 49 return 8 - x; 50} 51 52static inline int neon_3same_fp_size(DisasContext *s, int x) 53{ 54 /* Convert 0==fp32, 1==fp16 into a MO_* value */ 55 return MO_32 - x; 56} 57 58/* Include the generated Neon decoder */ 59#include "decode-neon-dp.c.inc" 60#include "decode-neon-ls.c.inc" 61#include "decode-neon-shared.c.inc" 62 63static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop) 64{ 65 long offset = neon_element_offset(reg, ele, mop & MO_SIZE); 66 67 switch (mop) { 68 case MO_UB: 69 tcg_gen_ld8u_i32(var, cpu_env, offset); 70 break; 71 case MO_UW: 72 tcg_gen_ld16u_i32(var, cpu_env, offset); 73 break; 74 case MO_UL: 75 tcg_gen_ld_i32(var, cpu_env, offset); 76 break; 77 default: 78 g_assert_not_reached(); 79 } 80} 81 82static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop) 83{ 84 long offset = neon_element_offset(reg, ele, mop & MO_SIZE); 85 86 switch (mop) { 87 case MO_UB: 88 tcg_gen_ld8u_i64(var, cpu_env, offset); 89 break; 90 case MO_UW: 91 tcg_gen_ld16u_i64(var, cpu_env, offset); 92 break; 93 case MO_UL: 94 tcg_gen_ld32u_i64(var, cpu_env, offset); 95 break; 96 case MO_Q: 97 tcg_gen_ld_i64(var, cpu_env, offset); 98 break; 99 default: 100 g_assert_not_reached(); 101 } 102} 103 104static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var) 105{ 106 long offset = neon_element_offset(reg, ele, size); 107 108 switch (size) { 109 case MO_8: 110 tcg_gen_st8_i32(var, cpu_env, offset); 111 break; 112 case MO_16: 113 tcg_gen_st16_i32(var, cpu_env, offset); 114 break; 115 case MO_32: 116 tcg_gen_st_i32(var, cpu_env, offset); 117 break; 118 default: 119 g_assert_not_reached(); 120 } 121} 122 123static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var) 124{ 125 long offset = neon_element_offset(reg, ele, size); 126 127 switch (size) { 128 case MO_8: 129 tcg_gen_st8_i64(var, cpu_env, offset); 130 break; 131 case MO_16: 132 tcg_gen_st16_i64(var, cpu_env, offset); 133 break; 134 case MO_32: 135 tcg_gen_st32_i64(var, cpu_env, offset); 136 break; 137 case MO_64: 138 tcg_gen_st_i64(var, cpu_env, offset); 139 break; 140 default: 141 g_assert_not_reached(); 142 } 143} 144 145static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a) 146{ 147 int opr_sz; 148 TCGv_ptr fpst; 149 gen_helper_gvec_3_ptr *fn_gvec_ptr; 150 151 if (!dc_isar_feature(aa32_vcma, s) 152 || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) { 153 return false; 154 } 155 156 /* UNDEF accesses to D16-D31 if they don't exist. */ 157 if (!dc_isar_feature(aa32_simd_r32, s) && 158 ((a->vd | a->vn | a->vm) & 0x10)) { 159 return false; 160 } 161 162 if ((a->vn | a->vm | a->vd) & a->q) { 163 return false; 164 } 165 166 if (!vfp_access_check(s)) { 167 return true; 168 } 169 170 opr_sz = (1 + a->q) * 8; 171 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 172 fn_gvec_ptr = (a->size == MO_16) ? 173 gen_helper_gvec_fcmlah : gen_helper_gvec_fcmlas; 174 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 175 vfp_reg_offset(1, a->vn), 176 vfp_reg_offset(1, a->vm), 177 fpst, opr_sz, opr_sz, a->rot, 178 fn_gvec_ptr); 179 tcg_temp_free_ptr(fpst); 180 return true; 181} 182 183static bool trans_VCADD(DisasContext *s, arg_VCADD *a) 184{ 185 int opr_sz; 186 TCGv_ptr fpst; 187 gen_helper_gvec_3_ptr *fn_gvec_ptr; 188 189 if (!dc_isar_feature(aa32_vcma, s) 190 || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) { 191 return false; 192 } 193 194 /* UNDEF accesses to D16-D31 if they don't exist. */ 195 if (!dc_isar_feature(aa32_simd_r32, s) && 196 ((a->vd | a->vn | a->vm) & 0x10)) { 197 return false; 198 } 199 200 if ((a->vn | a->vm | a->vd) & a->q) { 201 return false; 202 } 203 204 if (!vfp_access_check(s)) { 205 return true; 206 } 207 208 opr_sz = (1 + a->q) * 8; 209 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 210 fn_gvec_ptr = (a->size == MO_16) ? 211 gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds; 212 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 213 vfp_reg_offset(1, a->vn), 214 vfp_reg_offset(1, a->vm), 215 fpst, opr_sz, opr_sz, a->rot, 216 fn_gvec_ptr); 217 tcg_temp_free_ptr(fpst); 218 return true; 219} 220 221static bool trans_VDOT(DisasContext *s, arg_VDOT *a) 222{ 223 int opr_sz; 224 gen_helper_gvec_3 *fn_gvec; 225 226 if (!dc_isar_feature(aa32_dp, s)) { 227 return false; 228 } 229 230 /* UNDEF accesses to D16-D31 if they don't exist. */ 231 if (!dc_isar_feature(aa32_simd_r32, s) && 232 ((a->vd | a->vn | a->vm) & 0x10)) { 233 return false; 234 } 235 236 if ((a->vn | a->vm | a->vd) & a->q) { 237 return false; 238 } 239 240 if (!vfp_access_check(s)) { 241 return true; 242 } 243 244 opr_sz = (1 + a->q) * 8; 245 fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; 246 tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd), 247 vfp_reg_offset(1, a->vn), 248 vfp_reg_offset(1, a->vm), 249 opr_sz, opr_sz, 0, fn_gvec); 250 return true; 251} 252 253static bool trans_VFML(DisasContext *s, arg_VFML *a) 254{ 255 int opr_sz; 256 257 if (!dc_isar_feature(aa32_fhm, s)) { 258 return false; 259 } 260 261 /* UNDEF accesses to D16-D31 if they don't exist. */ 262 if (!dc_isar_feature(aa32_simd_r32, s) && 263 (a->vd & 0x10)) { 264 return false; 265 } 266 267 if (a->vd & a->q) { 268 return false; 269 } 270 271 if (!vfp_access_check(s)) { 272 return true; 273 } 274 275 opr_sz = (1 + a->q) * 8; 276 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 277 vfp_reg_offset(a->q, a->vn), 278 vfp_reg_offset(a->q, a->vm), 279 cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */ 280 gen_helper_gvec_fmlal_a32); 281 return true; 282} 283 284static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a) 285{ 286 gen_helper_gvec_3_ptr *fn_gvec_ptr; 287 int opr_sz; 288 TCGv_ptr fpst; 289 290 if (!dc_isar_feature(aa32_vcma, s)) { 291 return false; 292 } 293 if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { 294 return false; 295 } 296 297 /* UNDEF accesses to D16-D31 if they don't exist. */ 298 if (!dc_isar_feature(aa32_simd_r32, s) && 299 ((a->vd | a->vn | a->vm) & 0x10)) { 300 return false; 301 } 302 303 if ((a->vd | a->vn) & a->q) { 304 return false; 305 } 306 307 if (!vfp_access_check(s)) { 308 return true; 309 } 310 311 fn_gvec_ptr = (a->size == MO_16) ? 312 gen_helper_gvec_fcmlah_idx : gen_helper_gvec_fcmlas_idx; 313 opr_sz = (1 + a->q) * 8; 314 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 315 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 316 vfp_reg_offset(1, a->vn), 317 vfp_reg_offset(1, a->vm), 318 fpst, opr_sz, opr_sz, 319 (a->index << 2) | a->rot, fn_gvec_ptr); 320 tcg_temp_free_ptr(fpst); 321 return true; 322} 323 324static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a) 325{ 326 gen_helper_gvec_3 *fn_gvec; 327 int opr_sz; 328 TCGv_ptr fpst; 329 330 if (!dc_isar_feature(aa32_dp, s)) { 331 return false; 332 } 333 334 /* UNDEF accesses to D16-D31 if they don't exist. */ 335 if (!dc_isar_feature(aa32_simd_r32, s) && 336 ((a->vd | a->vn) & 0x10)) { 337 return false; 338 } 339 340 if ((a->vd | a->vn) & a->q) { 341 return false; 342 } 343 344 if (!vfp_access_check(s)) { 345 return true; 346 } 347 348 fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b; 349 opr_sz = (1 + a->q) * 8; 350 fpst = fpstatus_ptr(FPST_STD); 351 tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd), 352 vfp_reg_offset(1, a->vn), 353 vfp_reg_offset(1, a->rm), 354 opr_sz, opr_sz, a->index, fn_gvec); 355 tcg_temp_free_ptr(fpst); 356 return true; 357} 358 359static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a) 360{ 361 int opr_sz; 362 363 if (!dc_isar_feature(aa32_fhm, s)) { 364 return false; 365 } 366 367 /* UNDEF accesses to D16-D31 if they don't exist. */ 368 if (!dc_isar_feature(aa32_simd_r32, s) && 369 ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) { 370 return false; 371 } 372 373 if (a->vd & a->q) { 374 return false; 375 } 376 377 if (!vfp_access_check(s)) { 378 return true; 379 } 380 381 opr_sz = (1 + a->q) * 8; 382 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 383 vfp_reg_offset(a->q, a->vn), 384 vfp_reg_offset(a->q, a->rm), 385 cpu_env, opr_sz, opr_sz, 386 (a->index << 2) | a->s, /* is_2 == 0 */ 387 gen_helper_gvec_fmlal_idx_a32); 388 return true; 389} 390 391static struct { 392 int nregs; 393 int interleave; 394 int spacing; 395} const neon_ls_element_type[11] = { 396 {1, 4, 1}, 397 {1, 4, 2}, 398 {4, 1, 1}, 399 {2, 2, 2}, 400 {1, 3, 1}, 401 {1, 3, 2}, 402 {3, 1, 1}, 403 {1, 1, 1}, 404 {1, 2, 1}, 405 {1, 2, 2}, 406 {2, 1, 1} 407}; 408 409static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn, 410 int stride) 411{ 412 if (rm != 15) { 413 TCGv_i32 base; 414 415 base = load_reg(s, rn); 416 if (rm == 13) { 417 tcg_gen_addi_i32(base, base, stride); 418 } else { 419 TCGv_i32 index; 420 index = load_reg(s, rm); 421 tcg_gen_add_i32(base, base, index); 422 tcg_temp_free_i32(index); 423 } 424 store_reg(s, rn, base); 425 } 426} 427 428static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) 429{ 430 /* Neon load/store multiple structures */ 431 int nregs, interleave, spacing, reg, n; 432 MemOp endian = s->be_data; 433 int mmu_idx = get_mem_index(s); 434 int size = a->size; 435 TCGv_i64 tmp64; 436 TCGv_i32 addr, tmp; 437 438 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 439 return false; 440 } 441 442 /* UNDEF accesses to D16-D31 if they don't exist */ 443 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 444 return false; 445 } 446 if (a->itype > 10) { 447 return false; 448 } 449 /* Catch UNDEF cases for bad values of align field */ 450 switch (a->itype & 0xc) { 451 case 4: 452 if (a->align >= 2) { 453 return false; 454 } 455 break; 456 case 8: 457 if (a->align == 3) { 458 return false; 459 } 460 break; 461 default: 462 break; 463 } 464 nregs = neon_ls_element_type[a->itype].nregs; 465 interleave = neon_ls_element_type[a->itype].interleave; 466 spacing = neon_ls_element_type[a->itype].spacing; 467 if (size == 3 && (interleave | spacing) != 1) { 468 return false; 469 } 470 471 if (!vfp_access_check(s)) { 472 return true; 473 } 474 475 /* For our purposes, bytes are always little-endian. */ 476 if (size == 0) { 477 endian = MO_LE; 478 } 479 /* 480 * Consecutive little-endian elements from a single register 481 * can be promoted to a larger little-endian operation. 482 */ 483 if (interleave == 1 && endian == MO_LE) { 484 size = 3; 485 } 486 tmp64 = tcg_temp_new_i64(); 487 addr = tcg_temp_new_i32(); 488 tmp = tcg_const_i32(1 << size); 489 load_reg_var(s, addr, a->rn); 490 for (reg = 0; reg < nregs; reg++) { 491 for (n = 0; n < 8 >> size; n++) { 492 int xs; 493 for (xs = 0; xs < interleave; xs++) { 494 int tt = a->vd + reg + spacing * xs; 495 496 if (a->l) { 497 gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); 498 neon_store_element64(tt, n, size, tmp64); 499 } else { 500 neon_load_element64(tmp64, tt, n, size); 501 gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); 502 } 503 tcg_gen_add_i32(addr, addr, tmp); 504 } 505 } 506 } 507 tcg_temp_free_i32(addr); 508 tcg_temp_free_i32(tmp); 509 tcg_temp_free_i64(tmp64); 510 511 gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8); 512 return true; 513} 514 515static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) 516{ 517 /* Neon load single structure to all lanes */ 518 int reg, stride, vec_size; 519 int vd = a->vd; 520 int size = a->size; 521 int nregs = a->n + 1; 522 TCGv_i32 addr, tmp; 523 524 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 525 return false; 526 } 527 528 /* UNDEF accesses to D16-D31 if they don't exist */ 529 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 530 return false; 531 } 532 533 if (size == 3) { 534 if (nregs != 4 || a->a == 0) { 535 return false; 536 } 537 /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */ 538 size = 2; 539 } 540 if (nregs == 1 && a->a == 1 && size == 0) { 541 return false; 542 } 543 if (nregs == 3 && a->a == 1) { 544 return false; 545 } 546 547 if (!vfp_access_check(s)) { 548 return true; 549 } 550 551 /* 552 * VLD1 to all lanes: T bit indicates how many Dregs to write. 553 * VLD2/3/4 to all lanes: T bit indicates register stride. 554 */ 555 stride = a->t ? 2 : 1; 556 vec_size = nregs == 1 ? stride * 8 : 8; 557 558 tmp = tcg_temp_new_i32(); 559 addr = tcg_temp_new_i32(); 560 load_reg_var(s, addr, a->rn); 561 for (reg = 0; reg < nregs; reg++) { 562 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), 563 s->be_data | size); 564 if ((vd & 1) && vec_size == 16) { 565 /* 566 * We cannot write 16 bytes at once because the 567 * destination is unaligned. 568 */ 569 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd), 570 8, 8, tmp); 571 tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1), 572 neon_full_reg_offset(vd), 8, 8); 573 } else { 574 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd), 575 vec_size, vec_size, tmp); 576 } 577 tcg_gen_addi_i32(addr, addr, 1 << size); 578 vd += stride; 579 } 580 tcg_temp_free_i32(tmp); 581 tcg_temp_free_i32(addr); 582 583 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs); 584 585 return true; 586} 587 588static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) 589{ 590 /* Neon load/store single structure to one lane */ 591 int reg; 592 int nregs = a->n + 1; 593 int vd = a->vd; 594 TCGv_i32 addr, tmp; 595 596 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 597 return false; 598 } 599 600 /* UNDEF accesses to D16-D31 if they don't exist */ 601 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 602 return false; 603 } 604 605 /* Catch the UNDEF cases. This is unavoidably a bit messy. */ 606 switch (nregs) { 607 case 1: 608 if (((a->align & (1 << a->size)) != 0) || 609 (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) { 610 return false; 611 } 612 break; 613 case 3: 614 if ((a->align & 1) != 0) { 615 return false; 616 } 617 /* fall through */ 618 case 2: 619 if (a->size == 2 && (a->align & 2) != 0) { 620 return false; 621 } 622 break; 623 case 4: 624 if ((a->size == 2) && ((a->align & 3) == 3)) { 625 return false; 626 } 627 break; 628 default: 629 abort(); 630 } 631 if ((vd + a->stride * (nregs - 1)) > 31) { 632 /* 633 * Attempts to write off the end of the register file are 634 * UNPREDICTABLE; we choose to UNDEF because otherwise we would 635 * access off the end of the array that holds the register data. 636 */ 637 return false; 638 } 639 640 if (!vfp_access_check(s)) { 641 return true; 642 } 643 644 tmp = tcg_temp_new_i32(); 645 addr = tcg_temp_new_i32(); 646 load_reg_var(s, addr, a->rn); 647 /* 648 * TODO: if we implemented alignment exceptions, we should check 649 * addr against the alignment encoded in a->align here. 650 */ 651 for (reg = 0; reg < nregs; reg++) { 652 if (a->l) { 653 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), 654 s->be_data | a->size); 655 neon_store_element(vd, a->reg_idx, a->size, tmp); 656 } else { /* Store */ 657 neon_load_element(tmp, vd, a->reg_idx, a->size); 658 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), 659 s->be_data | a->size); 660 } 661 vd += a->stride; 662 tcg_gen_addi_i32(addr, addr, 1 << a->size); 663 } 664 tcg_temp_free_i32(addr); 665 tcg_temp_free_i32(tmp); 666 667 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs); 668 669 return true; 670} 671 672static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn) 673{ 674 int vec_size = a->q ? 16 : 8; 675 int rd_ofs = neon_full_reg_offset(a->vd); 676 int rn_ofs = neon_full_reg_offset(a->vn); 677 int rm_ofs = neon_full_reg_offset(a->vm); 678 679 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 680 return false; 681 } 682 683 /* UNDEF accesses to D16-D31 if they don't exist. */ 684 if (!dc_isar_feature(aa32_simd_r32, s) && 685 ((a->vd | a->vn | a->vm) & 0x10)) { 686 return false; 687 } 688 689 if ((a->vn | a->vm | a->vd) & a->q) { 690 return false; 691 } 692 693 if (!vfp_access_check(s)) { 694 return true; 695 } 696 697 fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); 698 return true; 699} 700 701#define DO_3SAME(INSN, FUNC) \ 702 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 703 { \ 704 return do_3same(s, a, FUNC); \ 705 } 706 707DO_3SAME(VADD, tcg_gen_gvec_add) 708DO_3SAME(VSUB, tcg_gen_gvec_sub) 709DO_3SAME(VAND, tcg_gen_gvec_and) 710DO_3SAME(VBIC, tcg_gen_gvec_andc) 711DO_3SAME(VORR, tcg_gen_gvec_or) 712DO_3SAME(VORN, tcg_gen_gvec_orc) 713DO_3SAME(VEOR, tcg_gen_gvec_xor) 714DO_3SAME(VSHL_S, gen_gvec_sshl) 715DO_3SAME(VSHL_U, gen_gvec_ushl) 716DO_3SAME(VQADD_S, gen_gvec_sqadd_qc) 717DO_3SAME(VQADD_U, gen_gvec_uqadd_qc) 718DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc) 719DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc) 720 721/* These insns are all gvec_bitsel but with the inputs in various orders. */ 722#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \ 723 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 724 uint32_t rn_ofs, uint32_t rm_ofs, \ 725 uint32_t oprsz, uint32_t maxsz) \ 726 { \ 727 tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \ 728 } \ 729 DO_3SAME(INSN, gen_##INSN##_3s) 730 731DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs) 732DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs) 733DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs) 734 735#define DO_3SAME_NO_SZ_3(INSN, FUNC) \ 736 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 737 { \ 738 if (a->size == 3) { \ 739 return false; \ 740 } \ 741 return do_3same(s, a, FUNC); \ 742 } 743 744DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax) 745DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax) 746DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin) 747DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin) 748DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul) 749DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla) 750DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls) 751DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst) 752DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd) 753DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba) 754DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd) 755DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba) 756 757#define DO_3SAME_CMP(INSN, COND) \ 758 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 759 uint32_t rn_ofs, uint32_t rm_ofs, \ 760 uint32_t oprsz, uint32_t maxsz) \ 761 { \ 762 tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \ 763 } \ 764 DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s) 765 766DO_3SAME_CMP(VCGT_S, TCG_COND_GT) 767DO_3SAME_CMP(VCGT_U, TCG_COND_GTU) 768DO_3SAME_CMP(VCGE_S, TCG_COND_GE) 769DO_3SAME_CMP(VCGE_U, TCG_COND_GEU) 770DO_3SAME_CMP(VCEQ, TCG_COND_EQ) 771 772#define WRAP_OOL_FN(WRAPNAME, FUNC) \ 773 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \ 774 uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \ 775 { \ 776 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \ 777 } 778 779WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b) 780 781static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) 782{ 783 if (a->size != 0) { 784 return false; 785 } 786 return do_3same(s, a, gen_VMUL_p_3s); 787} 788 789#define DO_VQRDMLAH(INSN, FUNC) \ 790 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 791 { \ 792 if (!dc_isar_feature(aa32_rdm, s)) { \ 793 return false; \ 794 } \ 795 if (a->size != 1 && a->size != 2) { \ 796 return false; \ 797 } \ 798 return do_3same(s, a, FUNC); \ 799 } 800 801DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc) 802DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc) 803 804#define DO_SHA1(NAME, FUNC) \ 805 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ 806 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ 807 { \ 808 if (!dc_isar_feature(aa32_sha1, s)) { \ 809 return false; \ 810 } \ 811 return do_3same(s, a, gen_##NAME##_3s); \ 812 } 813 814DO_SHA1(SHA1C, gen_helper_crypto_sha1c) 815DO_SHA1(SHA1P, gen_helper_crypto_sha1p) 816DO_SHA1(SHA1M, gen_helper_crypto_sha1m) 817DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0) 818 819#define DO_SHA2(NAME, FUNC) \ 820 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \ 821 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \ 822 { \ 823 if (!dc_isar_feature(aa32_sha2, s)) { \ 824 return false; \ 825 } \ 826 return do_3same(s, a, gen_##NAME##_3s); \ 827 } 828 829DO_SHA2(SHA256H, gen_helper_crypto_sha256h) 830DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2) 831DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1) 832 833#define DO_3SAME_64(INSN, FUNC) \ 834 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 835 uint32_t rn_ofs, uint32_t rm_ofs, \ 836 uint32_t oprsz, uint32_t maxsz) \ 837 { \ 838 static const GVecGen3 op = { .fni8 = FUNC }; \ 839 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \ 840 } \ 841 DO_3SAME(INSN, gen_##INSN##_3s) 842 843#define DO_3SAME_64_ENV(INSN, FUNC) \ 844 static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \ 845 { \ 846 FUNC(d, cpu_env, n, m); \ 847 } \ 848 DO_3SAME_64(INSN, gen_##INSN##_elt) 849 850DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64) 851DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64) 852DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64) 853DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64) 854DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64) 855DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) 856 857#define DO_3SAME_32(INSN, FUNC) \ 858 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 859 uint32_t rn_ofs, uint32_t rm_ofs, \ 860 uint32_t oprsz, uint32_t maxsz) \ 861 { \ 862 static const GVecGen3 ops[4] = { \ 863 { .fni4 = gen_helper_neon_##FUNC##8 }, \ 864 { .fni4 = gen_helper_neon_##FUNC##16 }, \ 865 { .fni4 = gen_helper_neon_##FUNC##32 }, \ 866 { 0 }, \ 867 }; \ 868 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ 869 } \ 870 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 871 { \ 872 if (a->size > 2) { \ 873 return false; \ 874 } \ 875 return do_3same(s, a, gen_##INSN##_3s); \ 876 } 877 878/* 879 * Some helper functions need to be passed the cpu_env. In order 880 * to use those with the gvec APIs like tcg_gen_gvec_3() we need 881 * to create wrapper functions whose prototype is a NeonGenTwoOpFn() 882 * and which call a NeonGenTwoOpEnvFn(). 883 */ 884#define WRAP_ENV_FN(WRAPNAME, FUNC) \ 885 static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \ 886 { \ 887 FUNC(d, cpu_env, n, m); \ 888 } 889 890#define DO_3SAME_32_ENV(INSN, FUNC) \ 891 WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \ 892 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \ 893 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \ 894 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 895 uint32_t rn_ofs, uint32_t rm_ofs, \ 896 uint32_t oprsz, uint32_t maxsz) \ 897 { \ 898 static const GVecGen3 ops[4] = { \ 899 { .fni4 = gen_##INSN##_tramp8 }, \ 900 { .fni4 = gen_##INSN##_tramp16 }, \ 901 { .fni4 = gen_##INSN##_tramp32 }, \ 902 { 0 }, \ 903 }; \ 904 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ 905 } \ 906 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 907 { \ 908 if (a->size > 2) { \ 909 return false; \ 910 } \ 911 return do_3same(s, a, gen_##INSN##_3s); \ 912 } 913 914DO_3SAME_32(VHADD_S, hadd_s) 915DO_3SAME_32(VHADD_U, hadd_u) 916DO_3SAME_32(VHSUB_S, hsub_s) 917DO_3SAME_32(VHSUB_U, hsub_u) 918DO_3SAME_32(VRHADD_S, rhadd_s) 919DO_3SAME_32(VRHADD_U, rhadd_u) 920DO_3SAME_32(VRSHL_S, rshl_s) 921DO_3SAME_32(VRSHL_U, rshl_u) 922 923DO_3SAME_32_ENV(VQSHL_S, qshl_s) 924DO_3SAME_32_ENV(VQSHL_U, qshl_u) 925DO_3SAME_32_ENV(VQRSHL_S, qrshl_s) 926DO_3SAME_32_ENV(VQRSHL_U, qrshl_u) 927 928static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn) 929{ 930 /* Operations handled pairwise 32 bits at a time */ 931 TCGv_i32 tmp, tmp2, tmp3; 932 933 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 934 return false; 935 } 936 937 /* UNDEF accesses to D16-D31 if they don't exist. */ 938 if (!dc_isar_feature(aa32_simd_r32, s) && 939 ((a->vd | a->vn | a->vm) & 0x10)) { 940 return false; 941 } 942 943 if (a->size == 3) { 944 return false; 945 } 946 947 if (!vfp_access_check(s)) { 948 return true; 949 } 950 951 assert(a->q == 0); /* enforced by decode patterns */ 952 953 /* 954 * Note that we have to be careful not to clobber the source operands 955 * in the "vm == vd" case by storing the result of the first pass too 956 * early. Since Q is 0 there are always just two passes, so instead 957 * of a complicated loop over each pass we just unroll. 958 */ 959 tmp = tcg_temp_new_i32(); 960 tmp2 = tcg_temp_new_i32(); 961 tmp3 = tcg_temp_new_i32(); 962 963 read_neon_element32(tmp, a->vn, 0, MO_32); 964 read_neon_element32(tmp2, a->vn, 1, MO_32); 965 fn(tmp, tmp, tmp2); 966 967 read_neon_element32(tmp3, a->vm, 0, MO_32); 968 read_neon_element32(tmp2, a->vm, 1, MO_32); 969 fn(tmp3, tmp3, tmp2); 970 971 write_neon_element32(tmp, a->vd, 0, MO_32); 972 write_neon_element32(tmp3, a->vd, 1, MO_32); 973 974 tcg_temp_free_i32(tmp); 975 tcg_temp_free_i32(tmp2); 976 tcg_temp_free_i32(tmp3); 977 return true; 978} 979 980#define DO_3SAME_PAIR(INSN, func) \ 981 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 982 { \ 983 static NeonGenTwoOpFn * const fns[] = { \ 984 gen_helper_neon_##func##8, \ 985 gen_helper_neon_##func##16, \ 986 gen_helper_neon_##func##32, \ 987 }; \ 988 if (a->size > 2) { \ 989 return false; \ 990 } \ 991 return do_3same_pair(s, a, fns[a->size]); \ 992 } 993 994/* 32-bit pairwise ops end up the same as the elementwise versions. */ 995#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32 996#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32 997#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32 998#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32 999#define gen_helper_neon_padd_u32 tcg_gen_add_i32 1000 1001DO_3SAME_PAIR(VPMAX_S, pmax_s) 1002DO_3SAME_PAIR(VPMIN_S, pmin_s) 1003DO_3SAME_PAIR(VPMAX_U, pmax_u) 1004DO_3SAME_PAIR(VPMIN_U, pmin_u) 1005DO_3SAME_PAIR(VPADD, padd_u) 1006 1007#define DO_3SAME_VQDMULH(INSN, FUNC) \ 1008 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \ 1009 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \ 1010 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ 1011 uint32_t rn_ofs, uint32_t rm_ofs, \ 1012 uint32_t oprsz, uint32_t maxsz) \ 1013 { \ 1014 static const GVecGen3 ops[2] = { \ 1015 { .fni4 = gen_##INSN##_tramp16 }, \ 1016 { .fni4 = gen_##INSN##_tramp32 }, \ 1017 }; \ 1018 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \ 1019 } \ 1020 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ 1021 { \ 1022 if (a->size != 1 && a->size != 2) { \ 1023 return false; \ 1024 } \ 1025 return do_3same(s, a, gen_##INSN##_3s); \ 1026 } 1027 1028DO_3SAME_VQDMULH(VQDMULH, qdmulh) 1029DO_3SAME_VQDMULH(VQRDMULH, qrdmulh) 1030 1031#define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \ 1032 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 1033 uint32_t rn_ofs, uint32_t rm_ofs, \ 1034 uint32_t oprsz, uint32_t maxsz) \ 1035 { \ 1036 TCGv_ptr fpst = fpstatus_ptr(FPST); \ 1037 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \ 1038 oprsz, maxsz, 0, FUNC); \ 1039 tcg_temp_free_ptr(fpst); \ 1040 } 1041 1042#define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \ 1043 WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \ 1044 WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \ 1045 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ 1046 { \ 1047 if (a->size == MO_16) { \ 1048 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 1049 return false; \ 1050 } \ 1051 return do_3same(s, a, gen_##INSN##_fp16_3s); \ 1052 } \ 1053 return do_3same(s, a, gen_##INSN##_fp32_3s); \ 1054 } 1055 1056 1057DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h) 1058DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h) 1059DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h) 1060DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h) 1061DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h) 1062DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h) 1063DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h) 1064DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h) 1065DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h) 1066DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h) 1067DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h) 1068DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h) 1069DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h) 1070DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h) 1071DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h) 1072DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h) 1073DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h) 1074 1075WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s) 1076WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h) 1077WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s) 1078WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h) 1079 1080static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a) 1081{ 1082 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 1083 return false; 1084 } 1085 1086 if (a->size == MO_16) { 1087 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1088 return false; 1089 } 1090 return do_3same(s, a, gen_VMAXNM_fp16_3s); 1091 } 1092 return do_3same(s, a, gen_VMAXNM_fp32_3s); 1093} 1094 1095static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a) 1096{ 1097 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 1098 return false; 1099 } 1100 1101 if (a->size == MO_16) { 1102 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1103 return false; 1104 } 1105 return do_3same(s, a, gen_VMINNM_fp16_3s); 1106 } 1107 return do_3same(s, a, gen_VMINNM_fp32_3s); 1108} 1109 1110static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, 1111 gen_helper_gvec_3_ptr *fn) 1112{ 1113 /* FP pairwise operations */ 1114 TCGv_ptr fpstatus; 1115 1116 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1117 return false; 1118 } 1119 1120 /* UNDEF accesses to D16-D31 if they don't exist. */ 1121 if (!dc_isar_feature(aa32_simd_r32, s) && 1122 ((a->vd | a->vn | a->vm) & 0x10)) { 1123 return false; 1124 } 1125 1126 if (!vfp_access_check(s)) { 1127 return true; 1128 } 1129 1130 assert(a->q == 0); /* enforced by decode patterns */ 1131 1132 1133 fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 1134 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), 1135 vfp_reg_offset(1, a->vn), 1136 vfp_reg_offset(1, a->vm), 1137 fpstatus, 8, 8, 0, fn); 1138 tcg_temp_free_ptr(fpstatus); 1139 1140 return true; 1141} 1142 1143/* 1144 * For all the functions using this macro, size == 1 means fp16, 1145 * which is an architecture extension we don't implement yet. 1146 */ 1147#define DO_3S_FP_PAIR(INSN,FUNC) \ 1148 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ 1149 { \ 1150 if (a->size == MO_16) { \ 1151 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 1152 return false; \ 1153 } \ 1154 return do_3same_fp_pair(s, a, FUNC##h); \ 1155 } \ 1156 return do_3same_fp_pair(s, a, FUNC##s); \ 1157 } 1158 1159DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd) 1160DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax) 1161DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin) 1162 1163static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) 1164{ 1165 /* Handle a 2-reg-shift insn which can be vectorized. */ 1166 int vec_size = a->q ? 16 : 8; 1167 int rd_ofs = neon_full_reg_offset(a->vd); 1168 int rm_ofs = neon_full_reg_offset(a->vm); 1169 1170 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1171 return false; 1172 } 1173 1174 /* UNDEF accesses to D16-D31 if they don't exist. */ 1175 if (!dc_isar_feature(aa32_simd_r32, s) && 1176 ((a->vd | a->vm) & 0x10)) { 1177 return false; 1178 } 1179 1180 if ((a->vm | a->vd) & a->q) { 1181 return false; 1182 } 1183 1184 if (!vfp_access_check(s)) { 1185 return true; 1186 } 1187 1188 fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size); 1189 return true; 1190} 1191 1192#define DO_2SH(INSN, FUNC) \ 1193 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1194 { \ 1195 return do_vector_2sh(s, a, FUNC); \ 1196 } \ 1197 1198DO_2SH(VSHL, tcg_gen_gvec_shli) 1199DO_2SH(VSLI, gen_gvec_sli) 1200DO_2SH(VSRI, gen_gvec_sri) 1201DO_2SH(VSRA_S, gen_gvec_ssra) 1202DO_2SH(VSRA_U, gen_gvec_usra) 1203DO_2SH(VRSHR_S, gen_gvec_srshr) 1204DO_2SH(VRSHR_U, gen_gvec_urshr) 1205DO_2SH(VRSRA_S, gen_gvec_srsra) 1206DO_2SH(VRSRA_U, gen_gvec_ursra) 1207 1208static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a) 1209{ 1210 /* Signed shift out of range results in all-sign-bits */ 1211 a->shift = MIN(a->shift, (8 << a->size) - 1); 1212 return do_vector_2sh(s, a, tcg_gen_gvec_sari); 1213} 1214 1215static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 1216 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1217{ 1218 tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0); 1219} 1220 1221static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a) 1222{ 1223 /* Shift out of range is architecturally valid and results in zero. */ 1224 if (a->shift >= (8 << a->size)) { 1225 return do_vector_2sh(s, a, gen_zero_rd_2sh); 1226 } else { 1227 return do_vector_2sh(s, a, tcg_gen_gvec_shri); 1228 } 1229} 1230 1231static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, 1232 NeonGenTwo64OpEnvFn *fn) 1233{ 1234 /* 1235 * 2-reg-and-shift operations, size == 3 case, where the 1236 * function needs to be passed cpu_env. 1237 */ 1238 TCGv_i64 constimm; 1239 int pass; 1240 1241 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1242 return false; 1243 } 1244 1245 /* UNDEF accesses to D16-D31 if they don't exist. */ 1246 if (!dc_isar_feature(aa32_simd_r32, s) && 1247 ((a->vd | a->vm) & 0x10)) { 1248 return false; 1249 } 1250 1251 if ((a->vm | a->vd) & a->q) { 1252 return false; 1253 } 1254 1255 if (!vfp_access_check(s)) { 1256 return true; 1257 } 1258 1259 /* 1260 * To avoid excessive duplication of ops we implement shift 1261 * by immediate using the variable shift operations. 1262 */ 1263 constimm = tcg_const_i64(dup_const(a->size, a->shift)); 1264 1265 for (pass = 0; pass < a->q + 1; pass++) { 1266 TCGv_i64 tmp = tcg_temp_new_i64(); 1267 1268 read_neon_element64(tmp, a->vm, pass, MO_64); 1269 fn(tmp, cpu_env, tmp, constimm); 1270 write_neon_element64(tmp, a->vd, pass, MO_64); 1271 tcg_temp_free_i64(tmp); 1272 } 1273 tcg_temp_free_i64(constimm); 1274 return true; 1275} 1276 1277static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, 1278 NeonGenTwoOpEnvFn *fn) 1279{ 1280 /* 1281 * 2-reg-and-shift operations, size < 3 case, where the 1282 * helper needs to be passed cpu_env. 1283 */ 1284 TCGv_i32 constimm, tmp; 1285 int pass; 1286 1287 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1288 return false; 1289 } 1290 1291 /* UNDEF accesses to D16-D31 if they don't exist. */ 1292 if (!dc_isar_feature(aa32_simd_r32, s) && 1293 ((a->vd | a->vm) & 0x10)) { 1294 return false; 1295 } 1296 1297 if ((a->vm | a->vd) & a->q) { 1298 return false; 1299 } 1300 1301 if (!vfp_access_check(s)) { 1302 return true; 1303 } 1304 1305 /* 1306 * To avoid excessive duplication of ops we implement shift 1307 * by immediate using the variable shift operations. 1308 */ 1309 constimm = tcg_const_i32(dup_const(a->size, a->shift)); 1310 tmp = tcg_temp_new_i32(); 1311 1312 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 1313 read_neon_element32(tmp, a->vm, pass, MO_32); 1314 fn(tmp, cpu_env, tmp, constimm); 1315 write_neon_element32(tmp, a->vd, pass, MO_32); 1316 } 1317 tcg_temp_free_i32(tmp); 1318 tcg_temp_free_i32(constimm); 1319 return true; 1320} 1321 1322#define DO_2SHIFT_ENV(INSN, FUNC) \ 1323 static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \ 1324 { \ 1325 return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \ 1326 } \ 1327 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1328 { \ 1329 static NeonGenTwoOpEnvFn * const fns[] = { \ 1330 gen_helper_neon_##FUNC##8, \ 1331 gen_helper_neon_##FUNC##16, \ 1332 gen_helper_neon_##FUNC##32, \ 1333 }; \ 1334 assert(a->size < ARRAY_SIZE(fns)); \ 1335 return do_2shift_env_32(s, a, fns[a->size]); \ 1336 } 1337 1338DO_2SHIFT_ENV(VQSHLU, qshlu_s) 1339DO_2SHIFT_ENV(VQSHL_U, qshl_u) 1340DO_2SHIFT_ENV(VQSHL_S, qshl_s) 1341 1342static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, 1343 NeonGenTwo64OpFn *shiftfn, 1344 NeonGenNarrowEnvFn *narrowfn) 1345{ 1346 /* 2-reg-and-shift narrowing-shift operations, size == 3 case */ 1347 TCGv_i64 constimm, rm1, rm2; 1348 TCGv_i32 rd; 1349 1350 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1351 return false; 1352 } 1353 1354 /* UNDEF accesses to D16-D31 if they don't exist. */ 1355 if (!dc_isar_feature(aa32_simd_r32, s) && 1356 ((a->vd | a->vm) & 0x10)) { 1357 return false; 1358 } 1359 1360 if (a->vm & 1) { 1361 return false; 1362 } 1363 1364 if (!vfp_access_check(s)) { 1365 return true; 1366 } 1367 1368 /* 1369 * This is always a right shift, and the shiftfn is always a 1370 * left-shift helper, which thus needs the negated shift count. 1371 */ 1372 constimm = tcg_const_i64(-a->shift); 1373 rm1 = tcg_temp_new_i64(); 1374 rm2 = tcg_temp_new_i64(); 1375 rd = tcg_temp_new_i32(); 1376 1377 /* Load both inputs first to avoid potential overwrite if rm == rd */ 1378 read_neon_element64(rm1, a->vm, 0, MO_64); 1379 read_neon_element64(rm2, a->vm, 1, MO_64); 1380 1381 shiftfn(rm1, rm1, constimm); 1382 narrowfn(rd, cpu_env, rm1); 1383 write_neon_element32(rd, a->vd, 0, MO_32); 1384 1385 shiftfn(rm2, rm2, constimm); 1386 narrowfn(rd, cpu_env, rm2); 1387 write_neon_element32(rd, a->vd, 1, MO_32); 1388 1389 tcg_temp_free_i32(rd); 1390 tcg_temp_free_i64(rm1); 1391 tcg_temp_free_i64(rm2); 1392 tcg_temp_free_i64(constimm); 1393 1394 return true; 1395} 1396 1397static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, 1398 NeonGenTwoOpFn *shiftfn, 1399 NeonGenNarrowEnvFn *narrowfn) 1400{ 1401 /* 2-reg-and-shift narrowing-shift operations, size < 3 case */ 1402 TCGv_i32 constimm, rm1, rm2, rm3, rm4; 1403 TCGv_i64 rtmp; 1404 uint32_t imm; 1405 1406 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1407 return false; 1408 } 1409 1410 /* UNDEF accesses to D16-D31 if they don't exist. */ 1411 if (!dc_isar_feature(aa32_simd_r32, s) && 1412 ((a->vd | a->vm) & 0x10)) { 1413 return false; 1414 } 1415 1416 if (a->vm & 1) { 1417 return false; 1418 } 1419 1420 if (!vfp_access_check(s)) { 1421 return true; 1422 } 1423 1424 /* 1425 * This is always a right shift, and the shiftfn is always a 1426 * left-shift helper, which thus needs the negated shift count 1427 * duplicated into each lane of the immediate value. 1428 */ 1429 if (a->size == 1) { 1430 imm = (uint16_t)(-a->shift); 1431 imm |= imm << 16; 1432 } else { 1433 /* size == 2 */ 1434 imm = -a->shift; 1435 } 1436 constimm = tcg_const_i32(imm); 1437 1438 /* Load all inputs first to avoid potential overwrite */ 1439 rm1 = tcg_temp_new_i32(); 1440 rm2 = tcg_temp_new_i32(); 1441 rm3 = tcg_temp_new_i32(); 1442 rm4 = tcg_temp_new_i32(); 1443 read_neon_element32(rm1, a->vm, 0, MO_32); 1444 read_neon_element32(rm2, a->vm, 1, MO_32); 1445 read_neon_element32(rm3, a->vm, 2, MO_32); 1446 read_neon_element32(rm4, a->vm, 3, MO_32); 1447 rtmp = tcg_temp_new_i64(); 1448 1449 shiftfn(rm1, rm1, constimm); 1450 shiftfn(rm2, rm2, constimm); 1451 1452 tcg_gen_concat_i32_i64(rtmp, rm1, rm2); 1453 tcg_temp_free_i32(rm2); 1454 1455 narrowfn(rm1, cpu_env, rtmp); 1456 write_neon_element32(rm1, a->vd, 0, MO_32); 1457 tcg_temp_free_i32(rm1); 1458 1459 shiftfn(rm3, rm3, constimm); 1460 shiftfn(rm4, rm4, constimm); 1461 tcg_temp_free_i32(constimm); 1462 1463 tcg_gen_concat_i32_i64(rtmp, rm3, rm4); 1464 tcg_temp_free_i32(rm4); 1465 1466 narrowfn(rm3, cpu_env, rtmp); 1467 tcg_temp_free_i64(rtmp); 1468 write_neon_element32(rm3, a->vd, 1, MO_32); 1469 tcg_temp_free_i32(rm3); 1470 return true; 1471} 1472 1473#define DO_2SN_64(INSN, FUNC, NARROWFUNC) \ 1474 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1475 { \ 1476 return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \ 1477 } 1478#define DO_2SN_32(INSN, FUNC, NARROWFUNC) \ 1479 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1480 { \ 1481 return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \ 1482 } 1483 1484static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1485{ 1486 tcg_gen_extrl_i64_i32(dest, src); 1487} 1488 1489static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1490{ 1491 gen_helper_neon_narrow_u16(dest, src); 1492} 1493 1494static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src) 1495{ 1496 gen_helper_neon_narrow_u8(dest, src); 1497} 1498 1499DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32) 1500DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16) 1501DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8) 1502 1503DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32) 1504DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16) 1505DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8) 1506 1507DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32) 1508DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16) 1509DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8) 1510 1511DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32) 1512DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16) 1513DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8) 1514DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32) 1515DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16) 1516DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8) 1517 1518DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32) 1519DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16) 1520DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8) 1521 1522DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32) 1523DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16) 1524DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8) 1525 1526DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32) 1527DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16) 1528DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8) 1529 1530static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, 1531 NeonGenWidenFn *widenfn, bool u) 1532{ 1533 TCGv_i64 tmp; 1534 TCGv_i32 rm0, rm1; 1535 uint64_t widen_mask = 0; 1536 1537 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1538 return false; 1539 } 1540 1541 /* UNDEF accesses to D16-D31 if they don't exist. */ 1542 if (!dc_isar_feature(aa32_simd_r32, s) && 1543 ((a->vd | a->vm) & 0x10)) { 1544 return false; 1545 } 1546 1547 if (a->vd & 1) { 1548 return false; 1549 } 1550 1551 if (!vfp_access_check(s)) { 1552 return true; 1553 } 1554 1555 /* 1556 * This is a widen-and-shift operation. The shift is always less 1557 * than the width of the source type, so after widening the input 1558 * vector we can simply shift the whole 64-bit widened register, 1559 * and then clear the potential overflow bits resulting from left 1560 * bits of the narrow input appearing as right bits of the left 1561 * neighbour narrow input. Calculate a mask of bits to clear. 1562 */ 1563 if ((a->shift != 0) && (a->size < 2 || u)) { 1564 int esize = 8 << a->size; 1565 widen_mask = MAKE_64BIT_MASK(0, esize); 1566 widen_mask >>= esize - a->shift; 1567 widen_mask = dup_const(a->size + 1, widen_mask); 1568 } 1569 1570 rm0 = tcg_temp_new_i32(); 1571 rm1 = tcg_temp_new_i32(); 1572 read_neon_element32(rm0, a->vm, 0, MO_32); 1573 read_neon_element32(rm1, a->vm, 1, MO_32); 1574 tmp = tcg_temp_new_i64(); 1575 1576 widenfn(tmp, rm0); 1577 tcg_temp_free_i32(rm0); 1578 if (a->shift != 0) { 1579 tcg_gen_shli_i64(tmp, tmp, a->shift); 1580 tcg_gen_andi_i64(tmp, tmp, ~widen_mask); 1581 } 1582 write_neon_element64(tmp, a->vd, 0, MO_64); 1583 1584 widenfn(tmp, rm1); 1585 tcg_temp_free_i32(rm1); 1586 if (a->shift != 0) { 1587 tcg_gen_shli_i64(tmp, tmp, a->shift); 1588 tcg_gen_andi_i64(tmp, tmp, ~widen_mask); 1589 } 1590 write_neon_element64(tmp, a->vd, 1, MO_64); 1591 tcg_temp_free_i64(tmp); 1592 return true; 1593} 1594 1595static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a) 1596{ 1597 static NeonGenWidenFn * const widenfn[] = { 1598 gen_helper_neon_widen_s8, 1599 gen_helper_neon_widen_s16, 1600 tcg_gen_ext_i32_i64, 1601 }; 1602 return do_vshll_2sh(s, a, widenfn[a->size], false); 1603} 1604 1605static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a) 1606{ 1607 static NeonGenWidenFn * const widenfn[] = { 1608 gen_helper_neon_widen_u8, 1609 gen_helper_neon_widen_u16, 1610 tcg_gen_extu_i32_i64, 1611 }; 1612 return do_vshll_2sh(s, a, widenfn[a->size], true); 1613} 1614 1615static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, 1616 gen_helper_gvec_2_ptr *fn) 1617{ 1618 /* FP operations in 2-reg-and-shift group */ 1619 int vec_size = a->q ? 16 : 8; 1620 int rd_ofs = neon_full_reg_offset(a->vd); 1621 int rm_ofs = neon_full_reg_offset(a->vm); 1622 TCGv_ptr fpst; 1623 1624 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1625 return false; 1626 } 1627 1628 if (a->size == MO_16) { 1629 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1630 return false; 1631 } 1632 } 1633 1634 /* UNDEF accesses to D16-D31 if they don't exist. */ 1635 if (!dc_isar_feature(aa32_simd_r32, s) && 1636 ((a->vd | a->vm) & 0x10)) { 1637 return false; 1638 } 1639 1640 if ((a->vm | a->vd) & a->q) { 1641 return false; 1642 } 1643 1644 if (!vfp_access_check(s)) { 1645 return true; 1646 } 1647 1648 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); 1649 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn); 1650 tcg_temp_free_ptr(fpst); 1651 return true; 1652} 1653 1654#define DO_FP_2SH(INSN, FUNC) \ 1655 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \ 1656 { \ 1657 return do_fp_2sh(s, a, FUNC); \ 1658 } 1659 1660DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf) 1661DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf) 1662DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs) 1663DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu) 1664 1665DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh) 1666DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh) 1667DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs) 1668DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu) 1669 1670static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op) 1671{ 1672 /* 1673 * Expand the encoded constant. 1674 * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE. 1675 * We choose to not special-case this and will behave as if a 1676 * valid constant encoding of 0 had been given. 1677 * cmode = 15 op = 1 must UNDEF; we assume decode has handled that. 1678 */ 1679 switch (cmode) { 1680 case 0: case 1: 1681 /* no-op */ 1682 break; 1683 case 2: case 3: 1684 imm <<= 8; 1685 break; 1686 case 4: case 5: 1687 imm <<= 16; 1688 break; 1689 case 6: case 7: 1690 imm <<= 24; 1691 break; 1692 case 8: case 9: 1693 imm |= imm << 16; 1694 break; 1695 case 10: case 11: 1696 imm = (imm << 8) | (imm << 24); 1697 break; 1698 case 12: 1699 imm = (imm << 8) | 0xff; 1700 break; 1701 case 13: 1702 imm = (imm << 16) | 0xffff; 1703 break; 1704 case 14: 1705 if (op) { 1706 /* 1707 * This is the only case where the top and bottom 32 bits 1708 * of the encoded constant differ. 1709 */ 1710 uint64_t imm64 = 0; 1711 int n; 1712 1713 for (n = 0; n < 8; n++) { 1714 if (imm & (1 << n)) { 1715 imm64 |= (0xffULL << (n * 8)); 1716 } 1717 } 1718 return imm64; 1719 } 1720 imm |= (imm << 8) | (imm << 16) | (imm << 24); 1721 break; 1722 case 15: 1723 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19) 1724 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30)); 1725 break; 1726 } 1727 if (op) { 1728 imm = ~imm; 1729 } 1730 return dup_const(MO_32, imm); 1731} 1732 1733static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a, 1734 GVecGen2iFn *fn) 1735{ 1736 uint64_t imm; 1737 int reg_ofs, vec_size; 1738 1739 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1740 return false; 1741 } 1742 1743 /* UNDEF accesses to D16-D31 if they don't exist. */ 1744 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 1745 return false; 1746 } 1747 1748 if (a->vd & a->q) { 1749 return false; 1750 } 1751 1752 if (!vfp_access_check(s)) { 1753 return true; 1754 } 1755 1756 reg_ofs = neon_full_reg_offset(a->vd); 1757 vec_size = a->q ? 16 : 8; 1758 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1759 1760 fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size); 1761 return true; 1762} 1763 1764static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs, 1765 int64_t c, uint32_t oprsz, uint32_t maxsz) 1766{ 1767 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 1768} 1769 1770static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a) 1771{ 1772 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1773 GVecGen2iFn *fn; 1774 1775 if ((a->cmode & 1) && a->cmode < 12) { 1776 /* for op=1, the imm will be inverted, so BIC becomes AND. */ 1777 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 1778 } else { 1779 /* There is one unallocated cmode/op combination in this space */ 1780 if (a->cmode == 15 && a->op == 1) { 1781 return false; 1782 } 1783 fn = gen_VMOV_1r; 1784 } 1785 return do_1reg_imm(s, a, fn); 1786} 1787 1788static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, 1789 NeonGenWidenFn *widenfn, 1790 NeonGenTwo64OpFn *opfn, 1791 int src1_mop, int src2_mop) 1792{ 1793 /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */ 1794 TCGv_i64 rn0_64, rn1_64, rm_64; 1795 1796 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1797 return false; 1798 } 1799 1800 /* UNDEF accesses to D16-D31 if they don't exist. */ 1801 if (!dc_isar_feature(aa32_simd_r32, s) && 1802 ((a->vd | a->vn | a->vm) & 0x10)) { 1803 return false; 1804 } 1805 1806 if (!opfn) { 1807 /* size == 3 case, which is an entirely different insn group */ 1808 return false; 1809 } 1810 1811 if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) { 1812 return false; 1813 } 1814 1815 if (!vfp_access_check(s)) { 1816 return true; 1817 } 1818 1819 rn0_64 = tcg_temp_new_i64(); 1820 rn1_64 = tcg_temp_new_i64(); 1821 rm_64 = tcg_temp_new_i64(); 1822 1823 if (src1_mop >= 0) { 1824 read_neon_element64(rn0_64, a->vn, 0, src1_mop); 1825 } else { 1826 TCGv_i32 tmp = tcg_temp_new_i32(); 1827 read_neon_element32(tmp, a->vn, 0, MO_32); 1828 widenfn(rn0_64, tmp); 1829 tcg_temp_free_i32(tmp); 1830 } 1831 if (src2_mop >= 0) { 1832 read_neon_element64(rm_64, a->vm, 0, src2_mop); 1833 } else { 1834 TCGv_i32 tmp = tcg_temp_new_i32(); 1835 read_neon_element32(tmp, a->vm, 0, MO_32); 1836 widenfn(rm_64, tmp); 1837 tcg_temp_free_i32(tmp); 1838 } 1839 1840 opfn(rn0_64, rn0_64, rm_64); 1841 1842 /* 1843 * Load second pass inputs before storing the first pass result, to 1844 * avoid incorrect results if a narrow input overlaps with the result. 1845 */ 1846 if (src1_mop >= 0) { 1847 read_neon_element64(rn1_64, a->vn, 1, src1_mop); 1848 } else { 1849 TCGv_i32 tmp = tcg_temp_new_i32(); 1850 read_neon_element32(tmp, a->vn, 1, MO_32); 1851 widenfn(rn1_64, tmp); 1852 tcg_temp_free_i32(tmp); 1853 } 1854 if (src2_mop >= 0) { 1855 read_neon_element64(rm_64, a->vm, 1, src2_mop); 1856 } else { 1857 TCGv_i32 tmp = tcg_temp_new_i32(); 1858 read_neon_element32(tmp, a->vm, 1, MO_32); 1859 widenfn(rm_64, tmp); 1860 tcg_temp_free_i32(tmp); 1861 } 1862 1863 write_neon_element64(rn0_64, a->vd, 0, MO_64); 1864 1865 opfn(rn1_64, rn1_64, rm_64); 1866 write_neon_element64(rn1_64, a->vd, 1, MO_64); 1867 1868 tcg_temp_free_i64(rn0_64); 1869 tcg_temp_free_i64(rn1_64); 1870 tcg_temp_free_i64(rm_64); 1871 1872 return true; 1873} 1874 1875#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \ 1876 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 1877 { \ 1878 static NeonGenWidenFn * const widenfn[] = { \ 1879 gen_helper_neon_widen_##S##8, \ 1880 gen_helper_neon_widen_##S##16, \ 1881 NULL, NULL, \ 1882 }; \ 1883 static NeonGenTwo64OpFn * const addfn[] = { \ 1884 gen_helper_neon_##OP##l_u16, \ 1885 gen_helper_neon_##OP##l_u32, \ 1886 tcg_gen_##OP##_i64, \ 1887 NULL, \ 1888 }; \ 1889 int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \ 1890 return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \ 1891 SRC1WIDE ? MO_Q : narrow_mop, \ 1892 narrow_mop); \ 1893 } 1894 1895DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN) 1896DO_PREWIDEN(VADDL_U, u, add, false, 0) 1897DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN) 1898DO_PREWIDEN(VSUBL_U, u, sub, false, 0) 1899DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN) 1900DO_PREWIDEN(VADDW_U, u, add, true, 0) 1901DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN) 1902DO_PREWIDEN(VSUBW_U, u, sub, true, 0) 1903 1904static bool do_narrow_3d(DisasContext *s, arg_3diff *a, 1905 NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn) 1906{ 1907 /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */ 1908 TCGv_i64 rn_64, rm_64; 1909 TCGv_i32 rd0, rd1; 1910 1911 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 1912 return false; 1913 } 1914 1915 /* UNDEF accesses to D16-D31 if they don't exist. */ 1916 if (!dc_isar_feature(aa32_simd_r32, s) && 1917 ((a->vd | a->vn | a->vm) & 0x10)) { 1918 return false; 1919 } 1920 1921 if (!opfn || !narrowfn) { 1922 /* size == 3 case, which is an entirely different insn group */ 1923 return false; 1924 } 1925 1926 if ((a->vn | a->vm) & 1) { 1927 return false; 1928 } 1929 1930 if (!vfp_access_check(s)) { 1931 return true; 1932 } 1933 1934 rn_64 = tcg_temp_new_i64(); 1935 rm_64 = tcg_temp_new_i64(); 1936 rd0 = tcg_temp_new_i32(); 1937 rd1 = tcg_temp_new_i32(); 1938 1939 read_neon_element64(rn_64, a->vn, 0, MO_64); 1940 read_neon_element64(rm_64, a->vm, 0, MO_64); 1941 1942 opfn(rn_64, rn_64, rm_64); 1943 1944 narrowfn(rd0, rn_64); 1945 1946 read_neon_element64(rn_64, a->vn, 1, MO_64); 1947 read_neon_element64(rm_64, a->vm, 1, MO_64); 1948 1949 opfn(rn_64, rn_64, rm_64); 1950 1951 narrowfn(rd1, rn_64); 1952 1953 write_neon_element32(rd0, a->vd, 0, MO_32); 1954 write_neon_element32(rd1, a->vd, 1, MO_32); 1955 1956 tcg_temp_free_i32(rd0); 1957 tcg_temp_free_i32(rd1); 1958 tcg_temp_free_i64(rn_64); 1959 tcg_temp_free_i64(rm_64); 1960 1961 return true; 1962} 1963 1964#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \ 1965 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 1966 { \ 1967 static NeonGenTwo64OpFn * const addfn[] = { \ 1968 gen_helper_neon_##OP##l_u16, \ 1969 gen_helper_neon_##OP##l_u32, \ 1970 tcg_gen_##OP##_i64, \ 1971 NULL, \ 1972 }; \ 1973 static NeonGenNarrowFn * const narrowfn[] = { \ 1974 gen_helper_neon_##NARROWTYPE##_high_u8, \ 1975 gen_helper_neon_##NARROWTYPE##_high_u16, \ 1976 EXTOP, \ 1977 NULL, \ 1978 }; \ 1979 return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \ 1980 } 1981 1982static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn) 1983{ 1984 tcg_gen_addi_i64(rn, rn, 1u << 31); 1985 tcg_gen_extrh_i64_i32(rd, rn); 1986} 1987 1988DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32) 1989DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32) 1990DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32) 1991DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32) 1992 1993static bool do_long_3d(DisasContext *s, arg_3diff *a, 1994 NeonGenTwoOpWidenFn *opfn, 1995 NeonGenTwo64OpFn *accfn) 1996{ 1997 /* 1998 * 3-regs different lengths, long operations. 1999 * These perform an operation on two inputs that returns a double-width 2000 * result, and then possibly perform an accumulation operation of 2001 * that result into the double-width destination. 2002 */ 2003 TCGv_i64 rd0, rd1, tmp; 2004 TCGv_i32 rn, rm; 2005 2006 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2007 return false; 2008 } 2009 2010 /* UNDEF accesses to D16-D31 if they don't exist. */ 2011 if (!dc_isar_feature(aa32_simd_r32, s) && 2012 ((a->vd | a->vn | a->vm) & 0x10)) { 2013 return false; 2014 } 2015 2016 if (!opfn) { 2017 /* size == 3 case, which is an entirely different insn group */ 2018 return false; 2019 } 2020 2021 if (a->vd & 1) { 2022 return false; 2023 } 2024 2025 if (!vfp_access_check(s)) { 2026 return true; 2027 } 2028 2029 rd0 = tcg_temp_new_i64(); 2030 rd1 = tcg_temp_new_i64(); 2031 2032 rn = tcg_temp_new_i32(); 2033 rm = tcg_temp_new_i32(); 2034 read_neon_element32(rn, a->vn, 0, MO_32); 2035 read_neon_element32(rm, a->vm, 0, MO_32); 2036 opfn(rd0, rn, rm); 2037 2038 read_neon_element32(rn, a->vn, 1, MO_32); 2039 read_neon_element32(rm, a->vm, 1, MO_32); 2040 opfn(rd1, rn, rm); 2041 tcg_temp_free_i32(rn); 2042 tcg_temp_free_i32(rm); 2043 2044 /* Don't store results until after all loads: they might overlap */ 2045 if (accfn) { 2046 tmp = tcg_temp_new_i64(); 2047 read_neon_element64(tmp, a->vd, 0, MO_64); 2048 accfn(rd0, tmp, rd0); 2049 read_neon_element64(tmp, a->vd, 1, MO_64); 2050 accfn(rd1, tmp, rd1); 2051 tcg_temp_free_i64(tmp); 2052 } 2053 2054 write_neon_element64(rd0, a->vd, 0, MO_64); 2055 write_neon_element64(rd1, a->vd, 1, MO_64); 2056 tcg_temp_free_i64(rd0); 2057 tcg_temp_free_i64(rd1); 2058 2059 return true; 2060} 2061 2062static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a) 2063{ 2064 static NeonGenTwoOpWidenFn * const opfn[] = { 2065 gen_helper_neon_abdl_s16, 2066 gen_helper_neon_abdl_s32, 2067 gen_helper_neon_abdl_s64, 2068 NULL, 2069 }; 2070 2071 return do_long_3d(s, a, opfn[a->size], NULL); 2072} 2073 2074static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a) 2075{ 2076 static NeonGenTwoOpWidenFn * const opfn[] = { 2077 gen_helper_neon_abdl_u16, 2078 gen_helper_neon_abdl_u32, 2079 gen_helper_neon_abdl_u64, 2080 NULL, 2081 }; 2082 2083 return do_long_3d(s, a, opfn[a->size], NULL); 2084} 2085 2086static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a) 2087{ 2088 static NeonGenTwoOpWidenFn * const opfn[] = { 2089 gen_helper_neon_abdl_s16, 2090 gen_helper_neon_abdl_s32, 2091 gen_helper_neon_abdl_s64, 2092 NULL, 2093 }; 2094 static NeonGenTwo64OpFn * const addfn[] = { 2095 gen_helper_neon_addl_u16, 2096 gen_helper_neon_addl_u32, 2097 tcg_gen_add_i64, 2098 NULL, 2099 }; 2100 2101 return do_long_3d(s, a, opfn[a->size], addfn[a->size]); 2102} 2103 2104static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a) 2105{ 2106 static NeonGenTwoOpWidenFn * const opfn[] = { 2107 gen_helper_neon_abdl_u16, 2108 gen_helper_neon_abdl_u32, 2109 gen_helper_neon_abdl_u64, 2110 NULL, 2111 }; 2112 static NeonGenTwo64OpFn * const addfn[] = { 2113 gen_helper_neon_addl_u16, 2114 gen_helper_neon_addl_u32, 2115 tcg_gen_add_i64, 2116 NULL, 2117 }; 2118 2119 return do_long_3d(s, a, opfn[a->size], addfn[a->size]); 2120} 2121 2122static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2123{ 2124 TCGv_i32 lo = tcg_temp_new_i32(); 2125 TCGv_i32 hi = tcg_temp_new_i32(); 2126 2127 tcg_gen_muls2_i32(lo, hi, rn, rm); 2128 tcg_gen_concat_i32_i64(rd, lo, hi); 2129 2130 tcg_temp_free_i32(lo); 2131 tcg_temp_free_i32(hi); 2132} 2133 2134static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2135{ 2136 TCGv_i32 lo = tcg_temp_new_i32(); 2137 TCGv_i32 hi = tcg_temp_new_i32(); 2138 2139 tcg_gen_mulu2_i32(lo, hi, rn, rm); 2140 tcg_gen_concat_i32_i64(rd, lo, hi); 2141 2142 tcg_temp_free_i32(lo); 2143 tcg_temp_free_i32(hi); 2144} 2145 2146static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a) 2147{ 2148 static NeonGenTwoOpWidenFn * const opfn[] = { 2149 gen_helper_neon_mull_s8, 2150 gen_helper_neon_mull_s16, 2151 gen_mull_s32, 2152 NULL, 2153 }; 2154 2155 return do_long_3d(s, a, opfn[a->size], NULL); 2156} 2157 2158static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a) 2159{ 2160 static NeonGenTwoOpWidenFn * const opfn[] = { 2161 gen_helper_neon_mull_u8, 2162 gen_helper_neon_mull_u16, 2163 gen_mull_u32, 2164 NULL, 2165 }; 2166 2167 return do_long_3d(s, a, opfn[a->size], NULL); 2168} 2169 2170#define DO_VMLAL(INSN,MULL,ACC) \ 2171 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \ 2172 { \ 2173 static NeonGenTwoOpWidenFn * const opfn[] = { \ 2174 gen_helper_neon_##MULL##8, \ 2175 gen_helper_neon_##MULL##16, \ 2176 gen_##MULL##32, \ 2177 NULL, \ 2178 }; \ 2179 static NeonGenTwo64OpFn * const accfn[] = { \ 2180 gen_helper_neon_##ACC##l_u16, \ 2181 gen_helper_neon_##ACC##l_u32, \ 2182 tcg_gen_##ACC##_i64, \ 2183 NULL, \ 2184 }; \ 2185 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \ 2186 } 2187 2188DO_VMLAL(VMLAL_S,mull_s,add) 2189DO_VMLAL(VMLAL_U,mull_u,add) 2190DO_VMLAL(VMLSL_S,mull_s,sub) 2191DO_VMLAL(VMLSL_U,mull_u,sub) 2192 2193static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2194{ 2195 gen_helper_neon_mull_s16(rd, rn, rm); 2196 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd); 2197} 2198 2199static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) 2200{ 2201 gen_mull_s32(rd, rn, rm); 2202 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd); 2203} 2204 2205static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a) 2206{ 2207 static NeonGenTwoOpWidenFn * const opfn[] = { 2208 NULL, 2209 gen_VQDMULL_16, 2210 gen_VQDMULL_32, 2211 NULL, 2212 }; 2213 2214 return do_long_3d(s, a, opfn[a->size], NULL); 2215} 2216 2217static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2218{ 2219 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); 2220} 2221 2222static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2223{ 2224 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); 2225} 2226 2227static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a) 2228{ 2229 static NeonGenTwoOpWidenFn * const opfn[] = { 2230 NULL, 2231 gen_VQDMULL_16, 2232 gen_VQDMULL_32, 2233 NULL, 2234 }; 2235 static NeonGenTwo64OpFn * const accfn[] = { 2236 NULL, 2237 gen_VQDMLAL_acc_16, 2238 gen_VQDMLAL_acc_32, 2239 NULL, 2240 }; 2241 2242 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); 2243} 2244 2245static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2246{ 2247 gen_helper_neon_negl_u32(rm, rm); 2248 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); 2249} 2250 2251static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) 2252{ 2253 tcg_gen_neg_i64(rm, rm); 2254 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); 2255} 2256 2257static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a) 2258{ 2259 static NeonGenTwoOpWidenFn * const opfn[] = { 2260 NULL, 2261 gen_VQDMULL_16, 2262 gen_VQDMULL_32, 2263 NULL, 2264 }; 2265 static NeonGenTwo64OpFn * const accfn[] = { 2266 NULL, 2267 gen_VQDMLSL_acc_16, 2268 gen_VQDMLSL_acc_32, 2269 NULL, 2270 }; 2271 2272 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); 2273} 2274 2275static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a) 2276{ 2277 gen_helper_gvec_3 *fn_gvec; 2278 2279 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2280 return false; 2281 } 2282 2283 /* UNDEF accesses to D16-D31 if they don't exist. */ 2284 if (!dc_isar_feature(aa32_simd_r32, s) && 2285 ((a->vd | a->vn | a->vm) & 0x10)) { 2286 return false; 2287 } 2288 2289 if (a->vd & 1) { 2290 return false; 2291 } 2292 2293 switch (a->size) { 2294 case 0: 2295 fn_gvec = gen_helper_neon_pmull_h; 2296 break; 2297 case 2: 2298 if (!dc_isar_feature(aa32_pmull, s)) { 2299 return false; 2300 } 2301 fn_gvec = gen_helper_gvec_pmull_q; 2302 break; 2303 default: 2304 return false; 2305 } 2306 2307 if (!vfp_access_check(s)) { 2308 return true; 2309 } 2310 2311 tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd), 2312 neon_full_reg_offset(a->vn), 2313 neon_full_reg_offset(a->vm), 2314 16, 16, 0, fn_gvec); 2315 return true; 2316} 2317 2318static void gen_neon_dup_low16(TCGv_i32 var) 2319{ 2320 TCGv_i32 tmp = tcg_temp_new_i32(); 2321 tcg_gen_ext16u_i32(var, var); 2322 tcg_gen_shli_i32(tmp, var, 16); 2323 tcg_gen_or_i32(var, var, tmp); 2324 tcg_temp_free_i32(tmp); 2325} 2326 2327static void gen_neon_dup_high16(TCGv_i32 var) 2328{ 2329 TCGv_i32 tmp = tcg_temp_new_i32(); 2330 tcg_gen_andi_i32(var, var, 0xffff0000); 2331 tcg_gen_shri_i32(tmp, var, 16); 2332 tcg_gen_or_i32(var, var, tmp); 2333 tcg_temp_free_i32(tmp); 2334} 2335 2336static inline TCGv_i32 neon_get_scalar(int size, int reg) 2337{ 2338 TCGv_i32 tmp = tcg_temp_new_i32(); 2339 if (size == MO_16) { 2340 read_neon_element32(tmp, reg & 7, reg >> 4, MO_32); 2341 if (reg & 8) { 2342 gen_neon_dup_high16(tmp); 2343 } else { 2344 gen_neon_dup_low16(tmp); 2345 } 2346 } else { 2347 read_neon_element32(tmp, reg & 15, reg >> 4, MO_32); 2348 } 2349 return tmp; 2350} 2351 2352static bool do_2scalar(DisasContext *s, arg_2scalar *a, 2353 NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn) 2354{ 2355 /* 2356 * Two registers and a scalar: perform an operation between 2357 * the input elements and the scalar, and then possibly 2358 * perform an accumulation operation of that result into the 2359 * destination. 2360 */ 2361 TCGv_i32 scalar, tmp; 2362 int pass; 2363 2364 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2365 return false; 2366 } 2367 2368 /* UNDEF accesses to D16-D31 if they don't exist. */ 2369 if (!dc_isar_feature(aa32_simd_r32, s) && 2370 ((a->vd | a->vn | a->vm) & 0x10)) { 2371 return false; 2372 } 2373 2374 if (!opfn) { 2375 /* Bad size (including size == 3, which is a different insn group) */ 2376 return false; 2377 } 2378 2379 if (a->q && ((a->vd | a->vn) & 1)) { 2380 return false; 2381 } 2382 2383 if (!vfp_access_check(s)) { 2384 return true; 2385 } 2386 2387 scalar = neon_get_scalar(a->size, a->vm); 2388 tmp = tcg_temp_new_i32(); 2389 2390 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 2391 read_neon_element32(tmp, a->vn, pass, MO_32); 2392 opfn(tmp, tmp, scalar); 2393 if (accfn) { 2394 TCGv_i32 rd = tcg_temp_new_i32(); 2395 read_neon_element32(rd, a->vd, pass, MO_32); 2396 accfn(tmp, rd, tmp); 2397 tcg_temp_free_i32(rd); 2398 } 2399 write_neon_element32(tmp, a->vd, pass, MO_32); 2400 } 2401 tcg_temp_free_i32(tmp); 2402 tcg_temp_free_i32(scalar); 2403 return true; 2404} 2405 2406static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a) 2407{ 2408 static NeonGenTwoOpFn * const opfn[] = { 2409 NULL, 2410 gen_helper_neon_mul_u16, 2411 tcg_gen_mul_i32, 2412 NULL, 2413 }; 2414 2415 return do_2scalar(s, a, opfn[a->size], NULL); 2416} 2417 2418static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a) 2419{ 2420 static NeonGenTwoOpFn * const opfn[] = { 2421 NULL, 2422 gen_helper_neon_mul_u16, 2423 tcg_gen_mul_i32, 2424 NULL, 2425 }; 2426 static NeonGenTwoOpFn * const accfn[] = { 2427 NULL, 2428 gen_helper_neon_add_u16, 2429 tcg_gen_add_i32, 2430 NULL, 2431 }; 2432 2433 return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2434} 2435 2436static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a) 2437{ 2438 static NeonGenTwoOpFn * const opfn[] = { 2439 NULL, 2440 gen_helper_neon_mul_u16, 2441 tcg_gen_mul_i32, 2442 NULL, 2443 }; 2444 static NeonGenTwoOpFn * const accfn[] = { 2445 NULL, 2446 gen_helper_neon_sub_u16, 2447 tcg_gen_sub_i32, 2448 NULL, 2449 }; 2450 2451 return do_2scalar(s, a, opfn[a->size], accfn[a->size]); 2452} 2453 2454static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a, 2455 gen_helper_gvec_3_ptr *fn) 2456{ 2457 /* Two registers and a scalar, using gvec */ 2458 int vec_size = a->q ? 16 : 8; 2459 int rd_ofs = neon_full_reg_offset(a->vd); 2460 int rn_ofs = neon_full_reg_offset(a->vn); 2461 int rm_ofs; 2462 int idx; 2463 TCGv_ptr fpstatus; 2464 2465 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2466 return false; 2467 } 2468 2469 /* UNDEF accesses to D16-D31 if they don't exist. */ 2470 if (!dc_isar_feature(aa32_simd_r32, s) && 2471 ((a->vd | a->vn | a->vm) & 0x10)) { 2472 return false; 2473 } 2474 2475 if (!fn) { 2476 /* Bad size (including size == 3, which is a different insn group) */ 2477 return false; 2478 } 2479 2480 if (a->q && ((a->vd | a->vn) & 1)) { 2481 return false; 2482 } 2483 2484 if (!vfp_access_check(s)) { 2485 return true; 2486 } 2487 2488 /* a->vm is M:Vm, which encodes both register and index */ 2489 idx = extract32(a->vm, a->size + 2, 2); 2490 a->vm = extract32(a->vm, 0, a->size + 2); 2491 rm_ofs = neon_full_reg_offset(a->vm); 2492 2493 fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD); 2494 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus, 2495 vec_size, vec_size, idx, fn); 2496 tcg_temp_free_ptr(fpstatus); 2497 return true; 2498} 2499 2500#define DO_VMUL_F_2sc(NAME, FUNC) \ 2501 static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \ 2502 { \ 2503 static gen_helper_gvec_3_ptr * const opfn[] = { \ 2504 NULL, \ 2505 gen_helper_##FUNC##_h, \ 2506 gen_helper_##FUNC##_s, \ 2507 NULL, \ 2508 }; \ 2509 if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \ 2510 return false; \ 2511 } \ 2512 return do_2scalar_fp_vec(s, a, opfn[a->size]); \ 2513 } 2514 2515DO_VMUL_F_2sc(VMUL, gvec_fmul_idx) 2516DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx) 2517DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx) 2518 2519WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16) 2520WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32) 2521WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16) 2522WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32) 2523 2524static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a) 2525{ 2526 static NeonGenTwoOpFn * const opfn[] = { 2527 NULL, 2528 gen_VQDMULH_16, 2529 gen_VQDMULH_32, 2530 NULL, 2531 }; 2532 2533 return do_2scalar(s, a, opfn[a->size], NULL); 2534} 2535 2536static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a) 2537{ 2538 static NeonGenTwoOpFn * const opfn[] = { 2539 NULL, 2540 gen_VQRDMULH_16, 2541 gen_VQRDMULH_32, 2542 NULL, 2543 }; 2544 2545 return do_2scalar(s, a, opfn[a->size], NULL); 2546} 2547 2548static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, 2549 NeonGenThreeOpEnvFn *opfn) 2550{ 2551 /* 2552 * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn 2553 * performs a kind of fused op-then-accumulate using a helper 2554 * function that takes all of rd, rn and the scalar at once. 2555 */ 2556 TCGv_i32 scalar, rn, rd; 2557 int pass; 2558 2559 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2560 return false; 2561 } 2562 2563 if (!dc_isar_feature(aa32_rdm, s)) { 2564 return false; 2565 } 2566 2567 /* UNDEF accesses to D16-D31 if they don't exist. */ 2568 if (!dc_isar_feature(aa32_simd_r32, s) && 2569 ((a->vd | a->vn | a->vm) & 0x10)) { 2570 return false; 2571 } 2572 2573 if (!opfn) { 2574 /* Bad size (including size == 3, which is a different insn group) */ 2575 return false; 2576 } 2577 2578 if (a->q && ((a->vd | a->vn) & 1)) { 2579 return false; 2580 } 2581 2582 if (!vfp_access_check(s)) { 2583 return true; 2584 } 2585 2586 scalar = neon_get_scalar(a->size, a->vm); 2587 rn = tcg_temp_new_i32(); 2588 rd = tcg_temp_new_i32(); 2589 2590 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 2591 read_neon_element32(rn, a->vn, pass, MO_32); 2592 read_neon_element32(rd, a->vd, pass, MO_32); 2593 opfn(rd, cpu_env, rn, scalar, rd); 2594 write_neon_element32(rd, a->vd, pass, MO_32); 2595 } 2596 tcg_temp_free_i32(rn); 2597 tcg_temp_free_i32(rd); 2598 tcg_temp_free_i32(scalar); 2599 2600 return true; 2601} 2602 2603static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a) 2604{ 2605 static NeonGenThreeOpEnvFn *opfn[] = { 2606 NULL, 2607 gen_helper_neon_qrdmlah_s16, 2608 gen_helper_neon_qrdmlah_s32, 2609 NULL, 2610 }; 2611 return do_vqrdmlah_2sc(s, a, opfn[a->size]); 2612} 2613 2614static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a) 2615{ 2616 static NeonGenThreeOpEnvFn *opfn[] = { 2617 NULL, 2618 gen_helper_neon_qrdmlsh_s16, 2619 gen_helper_neon_qrdmlsh_s32, 2620 NULL, 2621 }; 2622 return do_vqrdmlah_2sc(s, a, opfn[a->size]); 2623} 2624 2625static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, 2626 NeonGenTwoOpWidenFn *opfn, 2627 NeonGenTwo64OpFn *accfn) 2628{ 2629 /* 2630 * Two registers and a scalar, long operations: perform an 2631 * operation on the input elements and the scalar which produces 2632 * a double-width result, and then possibly perform an accumulation 2633 * operation of that result into the destination. 2634 */ 2635 TCGv_i32 scalar, rn; 2636 TCGv_i64 rn0_64, rn1_64; 2637 2638 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2639 return false; 2640 } 2641 2642 /* UNDEF accesses to D16-D31 if they don't exist. */ 2643 if (!dc_isar_feature(aa32_simd_r32, s) && 2644 ((a->vd | a->vn | a->vm) & 0x10)) { 2645 return false; 2646 } 2647 2648 if (!opfn) { 2649 /* Bad size (including size == 3, which is a different insn group) */ 2650 return false; 2651 } 2652 2653 if (a->vd & 1) { 2654 return false; 2655 } 2656 2657 if (!vfp_access_check(s)) { 2658 return true; 2659 } 2660 2661 scalar = neon_get_scalar(a->size, a->vm); 2662 2663 /* Load all inputs before writing any outputs, in case of overlap */ 2664 rn = tcg_temp_new_i32(); 2665 read_neon_element32(rn, a->vn, 0, MO_32); 2666 rn0_64 = tcg_temp_new_i64(); 2667 opfn(rn0_64, rn, scalar); 2668 2669 read_neon_element32(rn, a->vn, 1, MO_32); 2670 rn1_64 = tcg_temp_new_i64(); 2671 opfn(rn1_64, rn, scalar); 2672 tcg_temp_free_i32(rn); 2673 tcg_temp_free_i32(scalar); 2674 2675 if (accfn) { 2676 TCGv_i64 t64 = tcg_temp_new_i64(); 2677 read_neon_element64(t64, a->vd, 0, MO_64); 2678 accfn(rn0_64, t64, rn0_64); 2679 read_neon_element64(t64, a->vd, 1, MO_64); 2680 accfn(rn1_64, t64, rn1_64); 2681 tcg_temp_free_i64(t64); 2682 } 2683 2684 write_neon_element64(rn0_64, a->vd, 0, MO_64); 2685 write_neon_element64(rn1_64, a->vd, 1, MO_64); 2686 tcg_temp_free_i64(rn0_64); 2687 tcg_temp_free_i64(rn1_64); 2688 return true; 2689} 2690 2691static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a) 2692{ 2693 static NeonGenTwoOpWidenFn * const opfn[] = { 2694 NULL, 2695 gen_helper_neon_mull_s16, 2696 gen_mull_s32, 2697 NULL, 2698 }; 2699 2700 return do_2scalar_long(s, a, opfn[a->size], NULL); 2701} 2702 2703static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a) 2704{ 2705 static NeonGenTwoOpWidenFn * const opfn[] = { 2706 NULL, 2707 gen_helper_neon_mull_u16, 2708 gen_mull_u32, 2709 NULL, 2710 }; 2711 2712 return do_2scalar_long(s, a, opfn[a->size], NULL); 2713} 2714 2715#define DO_VMLAL_2SC(INSN, MULL, ACC) \ 2716 static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \ 2717 { \ 2718 static NeonGenTwoOpWidenFn * const opfn[] = { \ 2719 NULL, \ 2720 gen_helper_neon_##MULL##16, \ 2721 gen_##MULL##32, \ 2722 NULL, \ 2723 }; \ 2724 static NeonGenTwo64OpFn * const accfn[] = { \ 2725 NULL, \ 2726 gen_helper_neon_##ACC##l_u32, \ 2727 tcg_gen_##ACC##_i64, \ 2728 NULL, \ 2729 }; \ 2730 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \ 2731 } 2732 2733DO_VMLAL_2SC(VMLAL_S, mull_s, add) 2734DO_VMLAL_2SC(VMLAL_U, mull_u, add) 2735DO_VMLAL_2SC(VMLSL_S, mull_s, sub) 2736DO_VMLAL_2SC(VMLSL_U, mull_u, sub) 2737 2738static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a) 2739{ 2740 static NeonGenTwoOpWidenFn * const opfn[] = { 2741 NULL, 2742 gen_VQDMULL_16, 2743 gen_VQDMULL_32, 2744 NULL, 2745 }; 2746 2747 return do_2scalar_long(s, a, opfn[a->size], NULL); 2748} 2749 2750static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a) 2751{ 2752 static NeonGenTwoOpWidenFn * const opfn[] = { 2753 NULL, 2754 gen_VQDMULL_16, 2755 gen_VQDMULL_32, 2756 NULL, 2757 }; 2758 static NeonGenTwo64OpFn * const accfn[] = { 2759 NULL, 2760 gen_VQDMLAL_acc_16, 2761 gen_VQDMLAL_acc_32, 2762 NULL, 2763 }; 2764 2765 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); 2766} 2767 2768static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a) 2769{ 2770 static NeonGenTwoOpWidenFn * const opfn[] = { 2771 NULL, 2772 gen_VQDMULL_16, 2773 gen_VQDMULL_32, 2774 NULL, 2775 }; 2776 static NeonGenTwo64OpFn * const accfn[] = { 2777 NULL, 2778 gen_VQDMLSL_acc_16, 2779 gen_VQDMLSL_acc_32, 2780 NULL, 2781 }; 2782 2783 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); 2784} 2785 2786static bool trans_VEXT(DisasContext *s, arg_VEXT *a) 2787{ 2788 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2789 return false; 2790 } 2791 2792 /* UNDEF accesses to D16-D31 if they don't exist. */ 2793 if (!dc_isar_feature(aa32_simd_r32, s) && 2794 ((a->vd | a->vn | a->vm) & 0x10)) { 2795 return false; 2796 } 2797 2798 if ((a->vn | a->vm | a->vd) & a->q) { 2799 return false; 2800 } 2801 2802 if (a->imm > 7 && !a->q) { 2803 return false; 2804 } 2805 2806 if (!vfp_access_check(s)) { 2807 return true; 2808 } 2809 2810 if (!a->q) { 2811 /* Extract 64 bits from <Vm:Vn> */ 2812 TCGv_i64 left, right, dest; 2813 2814 left = tcg_temp_new_i64(); 2815 right = tcg_temp_new_i64(); 2816 dest = tcg_temp_new_i64(); 2817 2818 read_neon_element64(right, a->vn, 0, MO_64); 2819 read_neon_element64(left, a->vm, 0, MO_64); 2820 tcg_gen_extract2_i64(dest, right, left, a->imm * 8); 2821 write_neon_element64(dest, a->vd, 0, MO_64); 2822 2823 tcg_temp_free_i64(left); 2824 tcg_temp_free_i64(right); 2825 tcg_temp_free_i64(dest); 2826 } else { 2827 /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */ 2828 TCGv_i64 left, middle, right, destleft, destright; 2829 2830 left = tcg_temp_new_i64(); 2831 middle = tcg_temp_new_i64(); 2832 right = tcg_temp_new_i64(); 2833 destleft = tcg_temp_new_i64(); 2834 destright = tcg_temp_new_i64(); 2835 2836 if (a->imm < 8) { 2837 read_neon_element64(right, a->vn, 0, MO_64); 2838 read_neon_element64(middle, a->vn, 1, MO_64); 2839 tcg_gen_extract2_i64(destright, right, middle, a->imm * 8); 2840 read_neon_element64(left, a->vm, 0, MO_64); 2841 tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8); 2842 } else { 2843 read_neon_element64(right, a->vn, 1, MO_64); 2844 read_neon_element64(middle, a->vm, 0, MO_64); 2845 tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8); 2846 read_neon_element64(left, a->vm, 1, MO_64); 2847 tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8); 2848 } 2849 2850 write_neon_element64(destright, a->vd, 0, MO_64); 2851 write_neon_element64(destleft, a->vd, 1, MO_64); 2852 2853 tcg_temp_free_i64(destright); 2854 tcg_temp_free_i64(destleft); 2855 tcg_temp_free_i64(right); 2856 tcg_temp_free_i64(middle); 2857 tcg_temp_free_i64(left); 2858 } 2859 return true; 2860} 2861 2862static bool trans_VTBL(DisasContext *s, arg_VTBL *a) 2863{ 2864 TCGv_i64 val, def; 2865 TCGv_i32 desc; 2866 2867 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2868 return false; 2869 } 2870 2871 /* UNDEF accesses to D16-D31 if they don't exist. */ 2872 if (!dc_isar_feature(aa32_simd_r32, s) && 2873 ((a->vd | a->vn | a->vm) & 0x10)) { 2874 return false; 2875 } 2876 2877 if ((a->vn + a->len + 1) > 32) { 2878 /* 2879 * This is UNPREDICTABLE; we choose to UNDEF to avoid the 2880 * helper function running off the end of the register file. 2881 */ 2882 return false; 2883 } 2884 2885 if (!vfp_access_check(s)) { 2886 return true; 2887 } 2888 2889 desc = tcg_const_i32((a->vn << 2) | a->len); 2890 def = tcg_temp_new_i64(); 2891 if (a->op) { 2892 read_neon_element64(def, a->vd, 0, MO_64); 2893 } else { 2894 tcg_gen_movi_i64(def, 0); 2895 } 2896 val = tcg_temp_new_i64(); 2897 read_neon_element64(val, a->vm, 0, MO_64); 2898 2899 gen_helper_neon_tbl(val, cpu_env, desc, val, def); 2900 write_neon_element64(val, a->vd, 0, MO_64); 2901 2902 tcg_temp_free_i64(def); 2903 tcg_temp_free_i64(val); 2904 tcg_temp_free_i32(desc); 2905 return true; 2906} 2907 2908static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a) 2909{ 2910 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2911 return false; 2912 } 2913 2914 /* UNDEF accesses to D16-D31 if they don't exist. */ 2915 if (!dc_isar_feature(aa32_simd_r32, s) && 2916 ((a->vd | a->vm) & 0x10)) { 2917 return false; 2918 } 2919 2920 if (a->vd & a->q) { 2921 return false; 2922 } 2923 2924 if (!vfp_access_check(s)) { 2925 return true; 2926 } 2927 2928 tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd), 2929 neon_element_offset(a->vm, a->index, a->size), 2930 a->q ? 16 : 8, a->q ? 16 : 8); 2931 return true; 2932} 2933 2934static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) 2935{ 2936 int pass, half; 2937 TCGv_i32 tmp[2]; 2938 2939 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 2940 return false; 2941 } 2942 2943 /* UNDEF accesses to D16-D31 if they don't exist. */ 2944 if (!dc_isar_feature(aa32_simd_r32, s) && 2945 ((a->vd | a->vm) & 0x10)) { 2946 return false; 2947 } 2948 2949 if ((a->vd | a->vm) & a->q) { 2950 return false; 2951 } 2952 2953 if (a->size == 3) { 2954 return false; 2955 } 2956 2957 if (!vfp_access_check(s)) { 2958 return true; 2959 } 2960 2961 tmp[0] = tcg_temp_new_i32(); 2962 tmp[1] = tcg_temp_new_i32(); 2963 2964 for (pass = 0; pass < (a->q ? 2 : 1); pass++) { 2965 for (half = 0; half < 2; half++) { 2966 read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32); 2967 switch (a->size) { 2968 case 0: 2969 tcg_gen_bswap32_i32(tmp[half], tmp[half]); 2970 break; 2971 case 1: 2972 gen_swap_half(tmp[half], tmp[half]); 2973 break; 2974 case 2: 2975 break; 2976 default: 2977 g_assert_not_reached(); 2978 } 2979 } 2980 write_neon_element32(tmp[1], a->vd, pass * 2, MO_32); 2981 write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32); 2982 } 2983 2984 tcg_temp_free_i32(tmp[0]); 2985 tcg_temp_free_i32(tmp[1]); 2986 return true; 2987} 2988 2989static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a, 2990 NeonGenWidenFn *widenfn, 2991 NeonGenTwo64OpFn *opfn, 2992 NeonGenTwo64OpFn *accfn) 2993{ 2994 /* 2995 * Pairwise long operations: widen both halves of the pair, 2996 * combine the pairs with the opfn, and then possibly accumulate 2997 * into the destination with the accfn. 2998 */ 2999 int pass; 3000 3001 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3002 return false; 3003 } 3004 3005 /* UNDEF accesses to D16-D31 if they don't exist. */ 3006 if (!dc_isar_feature(aa32_simd_r32, s) && 3007 ((a->vd | a->vm) & 0x10)) { 3008 return false; 3009 } 3010 3011 if ((a->vd | a->vm) & a->q) { 3012 return false; 3013 } 3014 3015 if (!widenfn) { 3016 return false; 3017 } 3018 3019 if (!vfp_access_check(s)) { 3020 return true; 3021 } 3022 3023 for (pass = 0; pass < a->q + 1; pass++) { 3024 TCGv_i32 tmp; 3025 TCGv_i64 rm0_64, rm1_64, rd_64; 3026 3027 rm0_64 = tcg_temp_new_i64(); 3028 rm1_64 = tcg_temp_new_i64(); 3029 rd_64 = tcg_temp_new_i64(); 3030 3031 tmp = tcg_temp_new_i32(); 3032 read_neon_element32(tmp, a->vm, pass * 2, MO_32); 3033 widenfn(rm0_64, tmp); 3034 read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32); 3035 widenfn(rm1_64, tmp); 3036 tcg_temp_free_i32(tmp); 3037 3038 opfn(rd_64, rm0_64, rm1_64); 3039 tcg_temp_free_i64(rm0_64); 3040 tcg_temp_free_i64(rm1_64); 3041 3042 if (accfn) { 3043 TCGv_i64 tmp64 = tcg_temp_new_i64(); 3044 read_neon_element64(tmp64, a->vd, pass, MO_64); 3045 accfn(rd_64, tmp64, rd_64); 3046 tcg_temp_free_i64(tmp64); 3047 } 3048 write_neon_element64(rd_64, a->vd, pass, MO_64); 3049 tcg_temp_free_i64(rd_64); 3050 } 3051 return true; 3052} 3053 3054static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a) 3055{ 3056 static NeonGenWidenFn * const widenfn[] = { 3057 gen_helper_neon_widen_s8, 3058 gen_helper_neon_widen_s16, 3059 tcg_gen_ext_i32_i64, 3060 NULL, 3061 }; 3062 static NeonGenTwo64OpFn * const opfn[] = { 3063 gen_helper_neon_paddl_u16, 3064 gen_helper_neon_paddl_u32, 3065 tcg_gen_add_i64, 3066 NULL, 3067 }; 3068 3069 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); 3070} 3071 3072static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a) 3073{ 3074 static NeonGenWidenFn * const widenfn[] = { 3075 gen_helper_neon_widen_u8, 3076 gen_helper_neon_widen_u16, 3077 tcg_gen_extu_i32_i64, 3078 NULL, 3079 }; 3080 static NeonGenTwo64OpFn * const opfn[] = { 3081 gen_helper_neon_paddl_u16, 3082 gen_helper_neon_paddl_u32, 3083 tcg_gen_add_i64, 3084 NULL, 3085 }; 3086 3087 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL); 3088} 3089 3090static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a) 3091{ 3092 static NeonGenWidenFn * const widenfn[] = { 3093 gen_helper_neon_widen_s8, 3094 gen_helper_neon_widen_s16, 3095 tcg_gen_ext_i32_i64, 3096 NULL, 3097 }; 3098 static NeonGenTwo64OpFn * const opfn[] = { 3099 gen_helper_neon_paddl_u16, 3100 gen_helper_neon_paddl_u32, 3101 tcg_gen_add_i64, 3102 NULL, 3103 }; 3104 static NeonGenTwo64OpFn * const accfn[] = { 3105 gen_helper_neon_addl_u16, 3106 gen_helper_neon_addl_u32, 3107 tcg_gen_add_i64, 3108 NULL, 3109 }; 3110 3111 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], 3112 accfn[a->size]); 3113} 3114 3115static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a) 3116{ 3117 static NeonGenWidenFn * const widenfn[] = { 3118 gen_helper_neon_widen_u8, 3119 gen_helper_neon_widen_u16, 3120 tcg_gen_extu_i32_i64, 3121 NULL, 3122 }; 3123 static NeonGenTwo64OpFn * const opfn[] = { 3124 gen_helper_neon_paddl_u16, 3125 gen_helper_neon_paddl_u32, 3126 tcg_gen_add_i64, 3127 NULL, 3128 }; 3129 static NeonGenTwo64OpFn * const accfn[] = { 3130 gen_helper_neon_addl_u16, 3131 gen_helper_neon_addl_u32, 3132 tcg_gen_add_i64, 3133 NULL, 3134 }; 3135 3136 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], 3137 accfn[a->size]); 3138} 3139 3140typedef void ZipFn(TCGv_ptr, TCGv_ptr); 3141 3142static bool do_zip_uzp(DisasContext *s, arg_2misc *a, 3143 ZipFn *fn) 3144{ 3145 TCGv_ptr pd, pm; 3146 3147 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3148 return false; 3149 } 3150 3151 /* UNDEF accesses to D16-D31 if they don't exist. */ 3152 if (!dc_isar_feature(aa32_simd_r32, s) && 3153 ((a->vd | a->vm) & 0x10)) { 3154 return false; 3155 } 3156 3157 if ((a->vd | a->vm) & a->q) { 3158 return false; 3159 } 3160 3161 if (!fn) { 3162 /* Bad size or size/q combination */ 3163 return false; 3164 } 3165 3166 if (!vfp_access_check(s)) { 3167 return true; 3168 } 3169 3170 pd = vfp_reg_ptr(true, a->vd); 3171 pm = vfp_reg_ptr(true, a->vm); 3172 fn(pd, pm); 3173 tcg_temp_free_ptr(pd); 3174 tcg_temp_free_ptr(pm); 3175 return true; 3176} 3177 3178static bool trans_VUZP(DisasContext *s, arg_2misc *a) 3179{ 3180 static ZipFn * const fn[2][4] = { 3181 { 3182 gen_helper_neon_unzip8, 3183 gen_helper_neon_unzip16, 3184 NULL, 3185 NULL, 3186 }, { 3187 gen_helper_neon_qunzip8, 3188 gen_helper_neon_qunzip16, 3189 gen_helper_neon_qunzip32, 3190 NULL, 3191 } 3192 }; 3193 return do_zip_uzp(s, a, fn[a->q][a->size]); 3194} 3195 3196static bool trans_VZIP(DisasContext *s, arg_2misc *a) 3197{ 3198 static ZipFn * const fn[2][4] = { 3199 { 3200 gen_helper_neon_zip8, 3201 gen_helper_neon_zip16, 3202 NULL, 3203 NULL, 3204 }, { 3205 gen_helper_neon_qzip8, 3206 gen_helper_neon_qzip16, 3207 gen_helper_neon_qzip32, 3208 NULL, 3209 } 3210 }; 3211 return do_zip_uzp(s, a, fn[a->q][a->size]); 3212} 3213 3214static bool do_vmovn(DisasContext *s, arg_2misc *a, 3215 NeonGenNarrowEnvFn *narrowfn) 3216{ 3217 TCGv_i64 rm; 3218 TCGv_i32 rd0, rd1; 3219 3220 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3221 return false; 3222 } 3223 3224 /* UNDEF accesses to D16-D31 if they don't exist. */ 3225 if (!dc_isar_feature(aa32_simd_r32, s) && 3226 ((a->vd | a->vm) & 0x10)) { 3227 return false; 3228 } 3229 3230 if (a->vm & 1) { 3231 return false; 3232 } 3233 3234 if (!narrowfn) { 3235 return false; 3236 } 3237 3238 if (!vfp_access_check(s)) { 3239 return true; 3240 } 3241 3242 rm = tcg_temp_new_i64(); 3243 rd0 = tcg_temp_new_i32(); 3244 rd1 = tcg_temp_new_i32(); 3245 3246 read_neon_element64(rm, a->vm, 0, MO_64); 3247 narrowfn(rd0, cpu_env, rm); 3248 read_neon_element64(rm, a->vm, 1, MO_64); 3249 narrowfn(rd1, cpu_env, rm); 3250 write_neon_element32(rd0, a->vd, 0, MO_32); 3251 write_neon_element32(rd1, a->vd, 1, MO_32); 3252 tcg_temp_free_i32(rd0); 3253 tcg_temp_free_i32(rd1); 3254 tcg_temp_free_i64(rm); 3255 return true; 3256} 3257 3258#define DO_VMOVN(INSN, FUNC) \ 3259 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3260 { \ 3261 static NeonGenNarrowEnvFn * const narrowfn[] = { \ 3262 FUNC##8, \ 3263 FUNC##16, \ 3264 FUNC##32, \ 3265 NULL, \ 3266 }; \ 3267 return do_vmovn(s, a, narrowfn[a->size]); \ 3268 } 3269 3270DO_VMOVN(VMOVN, gen_neon_narrow_u) 3271DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat) 3272DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s) 3273DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u) 3274 3275static bool trans_VSHLL(DisasContext *s, arg_2misc *a) 3276{ 3277 TCGv_i32 rm0, rm1; 3278 TCGv_i64 rd; 3279 static NeonGenWidenFn * const widenfns[] = { 3280 gen_helper_neon_widen_u8, 3281 gen_helper_neon_widen_u16, 3282 tcg_gen_extu_i32_i64, 3283 NULL, 3284 }; 3285 NeonGenWidenFn *widenfn = widenfns[a->size]; 3286 3287 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3288 return false; 3289 } 3290 3291 /* UNDEF accesses to D16-D31 if they don't exist. */ 3292 if (!dc_isar_feature(aa32_simd_r32, s) && 3293 ((a->vd | a->vm) & 0x10)) { 3294 return false; 3295 } 3296 3297 if (a->vd & 1) { 3298 return false; 3299 } 3300 3301 if (!widenfn) { 3302 return false; 3303 } 3304 3305 if (!vfp_access_check(s)) { 3306 return true; 3307 } 3308 3309 rd = tcg_temp_new_i64(); 3310 rm0 = tcg_temp_new_i32(); 3311 rm1 = tcg_temp_new_i32(); 3312 3313 read_neon_element32(rm0, a->vm, 0, MO_32); 3314 read_neon_element32(rm1, a->vm, 1, MO_32); 3315 3316 widenfn(rd, rm0); 3317 tcg_gen_shli_i64(rd, rd, 8 << a->size); 3318 write_neon_element64(rd, a->vd, 0, MO_64); 3319 widenfn(rd, rm1); 3320 tcg_gen_shli_i64(rd, rd, 8 << a->size); 3321 write_neon_element64(rd, a->vd, 1, MO_64); 3322 3323 tcg_temp_free_i64(rd); 3324 tcg_temp_free_i32(rm0); 3325 tcg_temp_free_i32(rm1); 3326 return true; 3327} 3328 3329static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a) 3330{ 3331 TCGv_ptr fpst; 3332 TCGv_i32 ahp, tmp, tmp2, tmp3; 3333 3334 if (!arm_dc_feature(s, ARM_FEATURE_NEON) || 3335 !dc_isar_feature(aa32_fp16_spconv, s)) { 3336 return false; 3337 } 3338 3339 /* UNDEF accesses to D16-D31 if they don't exist. */ 3340 if (!dc_isar_feature(aa32_simd_r32, s) && 3341 ((a->vd | a->vm) & 0x10)) { 3342 return false; 3343 } 3344 3345 if ((a->vm & 1) || (a->size != 1)) { 3346 return false; 3347 } 3348 3349 if (!vfp_access_check(s)) { 3350 return true; 3351 } 3352 3353 fpst = fpstatus_ptr(FPST_STD); 3354 ahp = get_ahp_flag(); 3355 tmp = tcg_temp_new_i32(); 3356 read_neon_element32(tmp, a->vm, 0, MO_32); 3357 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 3358 tmp2 = tcg_temp_new_i32(); 3359 read_neon_element32(tmp2, a->vm, 1, MO_32); 3360 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp); 3361 tcg_gen_shli_i32(tmp2, tmp2, 16); 3362 tcg_gen_or_i32(tmp2, tmp2, tmp); 3363 read_neon_element32(tmp, a->vm, 2, MO_32); 3364 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 3365 tmp3 = tcg_temp_new_i32(); 3366 read_neon_element32(tmp3, a->vm, 3, MO_32); 3367 write_neon_element32(tmp2, a->vd, 0, MO_32); 3368 tcg_temp_free_i32(tmp2); 3369 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp); 3370 tcg_gen_shli_i32(tmp3, tmp3, 16); 3371 tcg_gen_or_i32(tmp3, tmp3, tmp); 3372 write_neon_element32(tmp3, a->vd, 1, MO_32); 3373 tcg_temp_free_i32(tmp3); 3374 tcg_temp_free_i32(tmp); 3375 tcg_temp_free_i32(ahp); 3376 tcg_temp_free_ptr(fpst); 3377 3378 return true; 3379} 3380 3381static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a) 3382{ 3383 TCGv_ptr fpst; 3384 TCGv_i32 ahp, tmp, tmp2, tmp3; 3385 3386 if (!arm_dc_feature(s, ARM_FEATURE_NEON) || 3387 !dc_isar_feature(aa32_fp16_spconv, s)) { 3388 return false; 3389 } 3390 3391 /* UNDEF accesses to D16-D31 if they don't exist. */ 3392 if (!dc_isar_feature(aa32_simd_r32, s) && 3393 ((a->vd | a->vm) & 0x10)) { 3394 return false; 3395 } 3396 3397 if ((a->vd & 1) || (a->size != 1)) { 3398 return false; 3399 } 3400 3401 if (!vfp_access_check(s)) { 3402 return true; 3403 } 3404 3405 fpst = fpstatus_ptr(FPST_STD); 3406 ahp = get_ahp_flag(); 3407 tmp3 = tcg_temp_new_i32(); 3408 tmp2 = tcg_temp_new_i32(); 3409 tmp = tcg_temp_new_i32(); 3410 read_neon_element32(tmp, a->vm, 0, MO_32); 3411 read_neon_element32(tmp2, a->vm, 1, MO_32); 3412 tcg_gen_ext16u_i32(tmp3, tmp); 3413 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); 3414 write_neon_element32(tmp3, a->vd, 0, MO_32); 3415 tcg_gen_shri_i32(tmp, tmp, 16); 3416 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp); 3417 write_neon_element32(tmp, a->vd, 1, MO_32); 3418 tcg_temp_free_i32(tmp); 3419 tcg_gen_ext16u_i32(tmp3, tmp2); 3420 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); 3421 write_neon_element32(tmp3, a->vd, 2, MO_32); 3422 tcg_temp_free_i32(tmp3); 3423 tcg_gen_shri_i32(tmp2, tmp2, 16); 3424 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp); 3425 write_neon_element32(tmp2, a->vd, 3, MO_32); 3426 tcg_temp_free_i32(tmp2); 3427 tcg_temp_free_i32(ahp); 3428 tcg_temp_free_ptr(fpst); 3429 3430 return true; 3431} 3432 3433static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn) 3434{ 3435 int vec_size = a->q ? 16 : 8; 3436 int rd_ofs = neon_full_reg_offset(a->vd); 3437 int rm_ofs = neon_full_reg_offset(a->vm); 3438 3439 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3440 return false; 3441 } 3442 3443 /* UNDEF accesses to D16-D31 if they don't exist. */ 3444 if (!dc_isar_feature(aa32_simd_r32, s) && 3445 ((a->vd | a->vm) & 0x10)) { 3446 return false; 3447 } 3448 3449 if (a->size == 3) { 3450 return false; 3451 } 3452 3453 if ((a->vd | a->vm) & a->q) { 3454 return false; 3455 } 3456 3457 if (!vfp_access_check(s)) { 3458 return true; 3459 } 3460 3461 fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size); 3462 3463 return true; 3464} 3465 3466#define DO_2MISC_VEC(INSN, FN) \ 3467 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3468 { \ 3469 return do_2misc_vec(s, a, FN); \ 3470 } 3471 3472DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg) 3473DO_2MISC_VEC(VABS, tcg_gen_gvec_abs) 3474DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0) 3475DO_2MISC_VEC(VCGT0, gen_gvec_cgt0) 3476DO_2MISC_VEC(VCLE0, gen_gvec_cle0) 3477DO_2MISC_VEC(VCGE0, gen_gvec_cge0) 3478DO_2MISC_VEC(VCLT0, gen_gvec_clt0) 3479 3480static bool trans_VMVN(DisasContext *s, arg_2misc *a) 3481{ 3482 if (a->size != 0) { 3483 return false; 3484 } 3485 return do_2misc_vec(s, a, tcg_gen_gvec_not); 3486} 3487 3488#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \ 3489 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 3490 uint32_t rm_ofs, uint32_t oprsz, \ 3491 uint32_t maxsz) \ 3492 { \ 3493 tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \ 3494 DATA, FUNC); \ 3495 } 3496 3497#define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \ 3498 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ 3499 uint32_t rm_ofs, uint32_t oprsz, \ 3500 uint32_t maxsz) \ 3501 { \ 3502 tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \ 3503 } 3504 3505WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0) 3506WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1) 3507WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0) 3508WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1) 3509WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0) 3510WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0) 3511WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0) 3512 3513#define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \ 3514 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3515 { \ 3516 if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \ 3517 return false; \ 3518 } \ 3519 return do_2misc_vec(s, a, gen_##INSN); \ 3520 } 3521 3522DO_2M_CRYPTO(AESE, aa32_aes, 0) 3523DO_2M_CRYPTO(AESD, aa32_aes, 0) 3524DO_2M_CRYPTO(AESMC, aa32_aes, 0) 3525DO_2M_CRYPTO(AESIMC, aa32_aes, 0) 3526DO_2M_CRYPTO(SHA1H, aa32_sha1, 2) 3527DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2) 3528DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2) 3529 3530static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) 3531{ 3532 TCGv_i32 tmp; 3533 int pass; 3534 3535 /* Handle a 2-reg-misc operation by iterating 32 bits at a time */ 3536 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3537 return false; 3538 } 3539 3540 /* UNDEF accesses to D16-D31 if they don't exist. */ 3541 if (!dc_isar_feature(aa32_simd_r32, s) && 3542 ((a->vd | a->vm) & 0x10)) { 3543 return false; 3544 } 3545 3546 if (!fn) { 3547 return false; 3548 } 3549 3550 if ((a->vd | a->vm) & a->q) { 3551 return false; 3552 } 3553 3554 if (!vfp_access_check(s)) { 3555 return true; 3556 } 3557 3558 tmp = tcg_temp_new_i32(); 3559 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 3560 read_neon_element32(tmp, a->vm, pass, MO_32); 3561 fn(tmp, tmp); 3562 write_neon_element32(tmp, a->vd, pass, MO_32); 3563 } 3564 tcg_temp_free_i32(tmp); 3565 3566 return true; 3567} 3568 3569static bool trans_VREV32(DisasContext *s, arg_2misc *a) 3570{ 3571 static NeonGenOneOpFn * const fn[] = { 3572 tcg_gen_bswap32_i32, 3573 gen_swap_half, 3574 NULL, 3575 NULL, 3576 }; 3577 return do_2misc(s, a, fn[a->size]); 3578} 3579 3580static bool trans_VREV16(DisasContext *s, arg_2misc *a) 3581{ 3582 if (a->size != 0) { 3583 return false; 3584 } 3585 return do_2misc(s, a, gen_rev16); 3586} 3587 3588static bool trans_VCLS(DisasContext *s, arg_2misc *a) 3589{ 3590 static NeonGenOneOpFn * const fn[] = { 3591 gen_helper_neon_cls_s8, 3592 gen_helper_neon_cls_s16, 3593 gen_helper_neon_cls_s32, 3594 NULL, 3595 }; 3596 return do_2misc(s, a, fn[a->size]); 3597} 3598 3599static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm) 3600{ 3601 tcg_gen_clzi_i32(rd, rm, 32); 3602} 3603 3604static bool trans_VCLZ(DisasContext *s, arg_2misc *a) 3605{ 3606 static NeonGenOneOpFn * const fn[] = { 3607 gen_helper_neon_clz_u8, 3608 gen_helper_neon_clz_u16, 3609 do_VCLZ_32, 3610 NULL, 3611 }; 3612 return do_2misc(s, a, fn[a->size]); 3613} 3614 3615static bool trans_VCNT(DisasContext *s, arg_2misc *a) 3616{ 3617 if (a->size != 0) { 3618 return false; 3619 } 3620 return do_2misc(s, a, gen_helper_neon_cnt_u8); 3621} 3622 3623static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 3624 uint32_t oprsz, uint32_t maxsz) 3625{ 3626 tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs, 3627 vece == MO_16 ? 0x7fff : 0x7fffffff, 3628 oprsz, maxsz); 3629} 3630 3631static bool trans_VABS_F(DisasContext *s, arg_2misc *a) 3632{ 3633 if (a->size == MO_16) { 3634 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3635 return false; 3636 } 3637 } else if (a->size != MO_32) { 3638 return false; 3639 } 3640 return do_2misc_vec(s, a, gen_VABS_F); 3641} 3642 3643static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, 3644 uint32_t oprsz, uint32_t maxsz) 3645{ 3646 tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs, 3647 vece == MO_16 ? 0x8000 : 0x80000000, 3648 oprsz, maxsz); 3649} 3650 3651static bool trans_VNEG_F(DisasContext *s, arg_2misc *a) 3652{ 3653 if (a->size == MO_16) { 3654 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3655 return false; 3656 } 3657 } else if (a->size != MO_32) { 3658 return false; 3659 } 3660 return do_2misc_vec(s, a, gen_VNEG_F); 3661} 3662 3663static bool trans_VRECPE(DisasContext *s, arg_2misc *a) 3664{ 3665 if (a->size != 2) { 3666 return false; 3667 } 3668 return do_2misc(s, a, gen_helper_recpe_u32); 3669} 3670 3671static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a) 3672{ 3673 if (a->size != 2) { 3674 return false; 3675 } 3676 return do_2misc(s, a, gen_helper_rsqrte_u32); 3677} 3678 3679#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \ 3680 static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \ 3681 { \ 3682 FUNC(d, cpu_env, m); \ 3683 } 3684 3685WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8) 3686WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16) 3687WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32) 3688WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8) 3689WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16) 3690WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32) 3691 3692static bool trans_VQABS(DisasContext *s, arg_2misc *a) 3693{ 3694 static NeonGenOneOpFn * const fn[] = { 3695 gen_VQABS_s8, 3696 gen_VQABS_s16, 3697 gen_VQABS_s32, 3698 NULL, 3699 }; 3700 return do_2misc(s, a, fn[a->size]); 3701} 3702 3703static bool trans_VQNEG(DisasContext *s, arg_2misc *a) 3704{ 3705 static NeonGenOneOpFn * const fn[] = { 3706 gen_VQNEG_s8, 3707 gen_VQNEG_s16, 3708 gen_VQNEG_s32, 3709 NULL, 3710 }; 3711 return do_2misc(s, a, fn[a->size]); 3712} 3713 3714#define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \ 3715 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \ 3716 uint32_t rm_ofs, \ 3717 uint32_t oprsz, uint32_t maxsz) \ 3718 { \ 3719 static gen_helper_gvec_2_ptr * const fns[4] = { \ 3720 NULL, HFUNC, SFUNC, NULL, \ 3721 }; \ 3722 TCGv_ptr fpst; \ 3723 fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \ 3724 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \ 3725 fns[vece]); \ 3726 tcg_temp_free_ptr(fpst); \ 3727 } \ 3728 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3729 { \ 3730 if (a->size == MO_16) { \ 3731 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 3732 return false; \ 3733 } \ 3734 } else if (a->size != MO_32) { \ 3735 return false; \ 3736 } \ 3737 return do_2misc_vec(s, a, gen_##INSN); \ 3738 } 3739 3740DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s) 3741DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s) 3742DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s) 3743DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s) 3744DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s) 3745DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s) 3746DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s) 3747DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos) 3748DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos) 3749DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs) 3750DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs) 3751 3752DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s) 3753 3754static bool trans_VRINTX(DisasContext *s, arg_2misc *a) 3755{ 3756 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { 3757 return false; 3758 } 3759 return trans_VRINTX_impl(s, a); 3760} 3761 3762#define DO_VEC_RMODE(INSN, RMODE, OP) \ 3763 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \ 3764 uint32_t rm_ofs, \ 3765 uint32_t oprsz, uint32_t maxsz) \ 3766 { \ 3767 static gen_helper_gvec_2_ptr * const fns[4] = { \ 3768 NULL, \ 3769 gen_helper_gvec_##OP##h, \ 3770 gen_helper_gvec_##OP##s, \ 3771 NULL, \ 3772 }; \ 3773 TCGv_ptr fpst; \ 3774 fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \ 3775 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \ 3776 arm_rmode_to_sf(RMODE), fns[vece]); \ 3777 tcg_temp_free_ptr(fpst); \ 3778 } \ 3779 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ 3780 { \ 3781 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \ 3782 return false; \ 3783 } \ 3784 if (a->size == MO_16) { \ 3785 if (!dc_isar_feature(aa32_fp16_arith, s)) { \ 3786 return false; \ 3787 } \ 3788 } else if (a->size != MO_32) { \ 3789 return false; \ 3790 } \ 3791 return do_2misc_vec(s, a, gen_##INSN); \ 3792 } 3793 3794DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u) 3795DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s) 3796DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u) 3797DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s) 3798DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u) 3799DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s) 3800DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u) 3801DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s) 3802 3803DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_) 3804DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_) 3805DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_) 3806DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_) 3807DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_) 3808 3809static bool trans_VSWP(DisasContext *s, arg_2misc *a) 3810{ 3811 TCGv_i64 rm, rd; 3812 int pass; 3813 3814 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3815 return false; 3816 } 3817 3818 /* UNDEF accesses to D16-D31 if they don't exist. */ 3819 if (!dc_isar_feature(aa32_simd_r32, s) && 3820 ((a->vd | a->vm) & 0x10)) { 3821 return false; 3822 } 3823 3824 if (a->size != 0) { 3825 return false; 3826 } 3827 3828 if ((a->vd | a->vm) & a->q) { 3829 return false; 3830 } 3831 3832 if (!vfp_access_check(s)) { 3833 return true; 3834 } 3835 3836 rm = tcg_temp_new_i64(); 3837 rd = tcg_temp_new_i64(); 3838 for (pass = 0; pass < (a->q ? 2 : 1); pass++) { 3839 read_neon_element64(rm, a->vm, pass, MO_64); 3840 read_neon_element64(rd, a->vd, pass, MO_64); 3841 write_neon_element64(rm, a->vd, pass, MO_64); 3842 write_neon_element64(rd, a->vm, pass, MO_64); 3843 } 3844 tcg_temp_free_i64(rm); 3845 tcg_temp_free_i64(rd); 3846 3847 return true; 3848} 3849static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1) 3850{ 3851 TCGv_i32 rd, tmp; 3852 3853 rd = tcg_temp_new_i32(); 3854 tmp = tcg_temp_new_i32(); 3855 3856 tcg_gen_shli_i32(rd, t0, 8); 3857 tcg_gen_andi_i32(rd, rd, 0xff00ff00); 3858 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff); 3859 tcg_gen_or_i32(rd, rd, tmp); 3860 3861 tcg_gen_shri_i32(t1, t1, 8); 3862 tcg_gen_andi_i32(t1, t1, 0x00ff00ff); 3863 tcg_gen_andi_i32(tmp, t0, 0xff00ff00); 3864 tcg_gen_or_i32(t1, t1, tmp); 3865 tcg_gen_mov_i32(t0, rd); 3866 3867 tcg_temp_free_i32(tmp); 3868 tcg_temp_free_i32(rd); 3869} 3870 3871static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1) 3872{ 3873 TCGv_i32 rd, tmp; 3874 3875 rd = tcg_temp_new_i32(); 3876 tmp = tcg_temp_new_i32(); 3877 3878 tcg_gen_shli_i32(rd, t0, 16); 3879 tcg_gen_andi_i32(tmp, t1, 0xffff); 3880 tcg_gen_or_i32(rd, rd, tmp); 3881 tcg_gen_shri_i32(t1, t1, 16); 3882 tcg_gen_andi_i32(tmp, t0, 0xffff0000); 3883 tcg_gen_or_i32(t1, t1, tmp); 3884 tcg_gen_mov_i32(t0, rd); 3885 3886 tcg_temp_free_i32(tmp); 3887 tcg_temp_free_i32(rd); 3888} 3889 3890static bool trans_VTRN(DisasContext *s, arg_2misc *a) 3891{ 3892 TCGv_i32 tmp, tmp2; 3893 int pass; 3894 3895 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3896 return false; 3897 } 3898 3899 /* UNDEF accesses to D16-D31 if they don't exist. */ 3900 if (!dc_isar_feature(aa32_simd_r32, s) && 3901 ((a->vd | a->vm) & 0x10)) { 3902 return false; 3903 } 3904 3905 if ((a->vd | a->vm) & a->q) { 3906 return false; 3907 } 3908 3909 if (a->size == 3) { 3910 return false; 3911 } 3912 3913 if (!vfp_access_check(s)) { 3914 return true; 3915 } 3916 3917 tmp = tcg_temp_new_i32(); 3918 tmp2 = tcg_temp_new_i32(); 3919 if (a->size == MO_32) { 3920 for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) { 3921 read_neon_element32(tmp, a->vm, pass, MO_32); 3922 read_neon_element32(tmp2, a->vd, pass + 1, MO_32); 3923 write_neon_element32(tmp2, a->vm, pass, MO_32); 3924 write_neon_element32(tmp, a->vd, pass + 1, MO_32); 3925 } 3926 } else { 3927 for (pass = 0; pass < (a->q ? 4 : 2); pass++) { 3928 read_neon_element32(tmp, a->vm, pass, MO_32); 3929 read_neon_element32(tmp2, a->vd, pass, MO_32); 3930 if (a->size == MO_8) { 3931 gen_neon_trn_u8(tmp, tmp2); 3932 } else { 3933 gen_neon_trn_u16(tmp, tmp2); 3934 } 3935 write_neon_element32(tmp2, a->vm, pass, MO_32); 3936 write_neon_element32(tmp, a->vd, pass, MO_32); 3937 } 3938 } 3939 tcg_temp_free_i32(tmp); 3940 tcg_temp_free_i32(tmp2); 3941 return true; 3942} 3943