1 /* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "exec/exec-all.h" 23 #include "tcg/tcg-op.h" 24 #include "tcg/tcg-op-gvec.h" 25 #include "tcg/tcg-gvec-desc.h" 26 #include "qemu/log.h" 27 #include "arm_ldst.h" 28 #include "translate.h" 29 #include "internals.h" 30 #include "exec/log.h" 31 #include "translate-a64.h" 32 #include "fpu/softfloat.h" 33 34 35 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 36 TCGv_i64, uint32_t, uint32_t); 37 38 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 39 TCGv_ptr, TCGv_i32); 40 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 41 TCGv_ptr, TCGv_ptr, TCGv_i32); 42 43 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); 44 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 45 TCGv_ptr, TCGv_i64, TCGv_i32); 46 47 /* 48 * Helpers for extracting complex instruction fields. 49 */ 50 51 /* See e.g. ASR (immediate, predicated). 52 * Returns -1 for unallocated encoding; diagnose later. 53 */ 54 static int tszimm_esz(DisasContext *s, int x) 55 { 56 x >>= 3; /* discard imm3 */ 57 return 31 - clz32(x); 58 } 59 60 static int tszimm_shr(DisasContext *s, int x) 61 { 62 return (16 << tszimm_esz(s, x)) - x; 63 } 64 65 /* See e.g. LSL (immediate, predicated). */ 66 static int tszimm_shl(DisasContext *s, int x) 67 { 68 return x - (8 << tszimm_esz(s, x)); 69 } 70 71 /* The SH bit is in bit 8. Extract the low 8 and shift. */ 72 static inline int expand_imm_sh8s(DisasContext *s, int x) 73 { 74 return (int8_t)x << (x & 0x100 ? 8 : 0); 75 } 76 77 static inline int expand_imm_sh8u(DisasContext *s, int x) 78 { 79 return (uint8_t)x << (x & 0x100 ? 8 : 0); 80 } 81 82 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 83 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 84 */ 85 static inline int msz_dtype(DisasContext *s, int msz) 86 { 87 static const uint8_t dtype[4] = { 0, 5, 10, 15 }; 88 return dtype[msz]; 89 } 90 91 /* 92 * Include the generated decoder. 93 */ 94 95 #include "decode-sve.c.inc" 96 97 /* 98 * Implement all of the translator functions referenced by the decoder. 99 */ 100 101 /* Invoke an out-of-line helper on 2 Zregs. */ 102 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 103 int rd, int rn, int data) 104 { 105 if (fn == NULL) { 106 return false; 107 } 108 if (sve_access_check(s)) { 109 unsigned vsz = vec_full_reg_size(s); 110 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 111 vec_full_reg_offset(s, rn), 112 vsz, vsz, data, fn); 113 } 114 return true; 115 } 116 117 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 118 int rd, int rn, int data, 119 ARMFPStatusFlavour flavour) 120 { 121 if (fn == NULL) { 122 return false; 123 } 124 if (sve_access_check(s)) { 125 unsigned vsz = vec_full_reg_size(s); 126 TCGv_ptr status = fpstatus_ptr(flavour); 127 128 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 129 vec_full_reg_offset(s, rn), 130 status, vsz, vsz, data, fn); 131 } 132 return true; 133 } 134 135 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, 136 arg_rr_esz *a, int data) 137 { 138 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, 139 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 140 } 141 142 /* Invoke an out-of-line helper on 3 Zregs. */ 143 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 144 int rd, int rn, int rm, int data) 145 { 146 if (fn == NULL) { 147 return false; 148 } 149 if (sve_access_check(s)) { 150 unsigned vsz = vec_full_reg_size(s); 151 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 152 vec_full_reg_offset(s, rn), 153 vec_full_reg_offset(s, rm), 154 vsz, vsz, data, fn); 155 } 156 return true; 157 } 158 159 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 160 arg_rrr_esz *a, int data) 161 { 162 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 163 } 164 165 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ 166 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 167 int rd, int rn, int rm, 168 int data, ARMFPStatusFlavour flavour) 169 { 170 if (fn == NULL) { 171 return false; 172 } 173 if (sve_access_check(s)) { 174 unsigned vsz = vec_full_reg_size(s); 175 TCGv_ptr status = fpstatus_ptr(flavour); 176 177 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 178 vec_full_reg_offset(s, rn), 179 vec_full_reg_offset(s, rm), 180 status, vsz, vsz, data, fn); 181 } 182 return true; 183 } 184 185 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 186 arg_rrr_esz *a, int data) 187 { 188 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, 189 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 190 } 191 192 /* Invoke an out-of-line helper on 4 Zregs. */ 193 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 194 int rd, int rn, int rm, int ra, int data) 195 { 196 if (fn == NULL) { 197 return false; 198 } 199 if (sve_access_check(s)) { 200 unsigned vsz = vec_full_reg_size(s); 201 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 202 vec_full_reg_offset(s, rn), 203 vec_full_reg_offset(s, rm), 204 vec_full_reg_offset(s, ra), 205 vsz, vsz, data, fn); 206 } 207 return true; 208 } 209 210 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 211 arg_rrrr_esz *a, int data) 212 { 213 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 214 } 215 216 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, 217 arg_rrxr_esz *a) 218 { 219 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 220 } 221 222 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ 223 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 224 int rd, int rn, int rm, int ra, 225 int data, TCGv_ptr ptr) 226 { 227 if (fn == NULL) { 228 return false; 229 } 230 if (sve_access_check(s)) { 231 unsigned vsz = vec_full_reg_size(s); 232 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 233 vec_full_reg_offset(s, rn), 234 vec_full_reg_offset(s, rm), 235 vec_full_reg_offset(s, ra), 236 ptr, vsz, vsz, data, fn); 237 } 238 return true; 239 } 240 241 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 242 int rd, int rn, int rm, int ra, 243 int data, ARMFPStatusFlavour flavour) 244 { 245 TCGv_ptr status = fpstatus_ptr(flavour); 246 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); 247 return ret; 248 } 249 250 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ 251 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, 252 int rd, int rn, int rm, int ra, int pg, 253 int data, ARMFPStatusFlavour flavour) 254 { 255 if (fn == NULL) { 256 return false; 257 } 258 if (sve_access_check(s)) { 259 unsigned vsz = vec_full_reg_size(s); 260 TCGv_ptr status = fpstatus_ptr(flavour); 261 262 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), 263 vec_full_reg_offset(s, rn), 264 vec_full_reg_offset(s, rm), 265 vec_full_reg_offset(s, ra), 266 pred_full_reg_offset(s, pg), 267 status, vsz, vsz, data, fn); 268 } 269 return true; 270 } 271 272 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 273 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 274 int rd, int rn, int pg, int data) 275 { 276 if (fn == NULL) { 277 return false; 278 } 279 if (sve_access_check(s)) { 280 unsigned vsz = vec_full_reg_size(s); 281 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 282 vec_full_reg_offset(s, rn), 283 pred_full_reg_offset(s, pg), 284 vsz, vsz, data, fn); 285 } 286 return true; 287 } 288 289 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, 290 arg_rpr_esz *a, int data) 291 { 292 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); 293 } 294 295 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, 296 arg_rpri_esz *a) 297 { 298 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 299 } 300 301 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, 302 int rd, int rn, int pg, int data, 303 ARMFPStatusFlavour flavour) 304 { 305 if (fn == NULL) { 306 return false; 307 } 308 if (sve_access_check(s)) { 309 unsigned vsz = vec_full_reg_size(s); 310 TCGv_ptr status = fpstatus_ptr(flavour); 311 312 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 313 vec_full_reg_offset(s, rn), 314 pred_full_reg_offset(s, pg), 315 status, vsz, vsz, data, fn); 316 } 317 return true; 318 } 319 320 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, 321 arg_rpr_esz *a, int data, 322 ARMFPStatusFlavour flavour) 323 { 324 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); 325 } 326 327 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 328 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 329 int rd, int rn, int rm, int pg, int data) 330 { 331 if (fn == NULL) { 332 return false; 333 } 334 if (sve_access_check(s)) { 335 unsigned vsz = vec_full_reg_size(s); 336 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 337 vec_full_reg_offset(s, rn), 338 vec_full_reg_offset(s, rm), 339 pred_full_reg_offset(s, pg), 340 vsz, vsz, data, fn); 341 } 342 return true; 343 } 344 345 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, 346 arg_rprr_esz *a, int data) 347 { 348 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); 349 } 350 351 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 352 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, 353 int rd, int rn, int rm, int pg, int data, 354 ARMFPStatusFlavour flavour) 355 { 356 if (fn == NULL) { 357 return false; 358 } 359 if (sve_access_check(s)) { 360 unsigned vsz = vec_full_reg_size(s); 361 TCGv_ptr status = fpstatus_ptr(flavour); 362 363 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 364 vec_full_reg_offset(s, rn), 365 vec_full_reg_offset(s, rm), 366 pred_full_reg_offset(s, pg), 367 status, vsz, vsz, data, fn); 368 } 369 return true; 370 } 371 372 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, 373 arg_rprr_esz *a) 374 { 375 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, 376 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 377 } 378 379 /* Invoke a vector expander on two Zregs and an immediate. */ 380 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 381 int esz, int rd, int rn, uint64_t imm) 382 { 383 if (gvec_fn == NULL) { 384 return false; 385 } 386 if (sve_access_check(s)) { 387 unsigned vsz = vec_full_reg_size(s); 388 gvec_fn(esz, vec_full_reg_offset(s, rd), 389 vec_full_reg_offset(s, rn), imm, vsz, vsz); 390 } 391 return true; 392 } 393 394 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, 395 arg_rri_esz *a) 396 { 397 if (a->esz < 0) { 398 /* Invalid tsz encoding -- see tszimm_esz. */ 399 return false; 400 } 401 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); 402 } 403 404 /* Invoke a vector expander on three Zregs. */ 405 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 406 int esz, int rd, int rn, int rm) 407 { 408 if (gvec_fn == NULL) { 409 return false; 410 } 411 if (sve_access_check(s)) { 412 unsigned vsz = vec_full_reg_size(s); 413 gvec_fn(esz, vec_full_reg_offset(s, rd), 414 vec_full_reg_offset(s, rn), 415 vec_full_reg_offset(s, rm), vsz, vsz); 416 } 417 return true; 418 } 419 420 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, 421 arg_rrr_esz *a) 422 { 423 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 424 } 425 426 /* Invoke a vector expander on four Zregs. */ 427 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 428 arg_rrrr_esz *a) 429 { 430 if (gvec_fn == NULL) { 431 return false; 432 } 433 if (sve_access_check(s)) { 434 unsigned vsz = vec_full_reg_size(s); 435 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 436 vec_full_reg_offset(s, a->rn), 437 vec_full_reg_offset(s, a->rm), 438 vec_full_reg_offset(s, a->ra), vsz, vsz); 439 } 440 return true; 441 } 442 443 /* Invoke a vector move on two Zregs. */ 444 static bool do_mov_z(DisasContext *s, int rd, int rn) 445 { 446 if (sve_access_check(s)) { 447 unsigned vsz = vec_full_reg_size(s); 448 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), 449 vec_full_reg_offset(s, rn), vsz, vsz); 450 } 451 return true; 452 } 453 454 /* Initialize a Zreg with replications of a 64-bit immediate. */ 455 static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 456 { 457 unsigned vsz = vec_full_reg_size(s); 458 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 459 } 460 461 /* Invoke a vector expander on three Pregs. */ 462 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 463 int rd, int rn, int rm) 464 { 465 if (sve_access_check(s)) { 466 unsigned psz = pred_gvec_reg_size(s); 467 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 468 pred_full_reg_offset(s, rn), 469 pred_full_reg_offset(s, rm), psz, psz); 470 } 471 return true; 472 } 473 474 /* Invoke a vector move on two Pregs. */ 475 static bool do_mov_p(DisasContext *s, int rd, int rn) 476 { 477 if (sve_access_check(s)) { 478 unsigned psz = pred_gvec_reg_size(s); 479 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 480 pred_full_reg_offset(s, rn), psz, psz); 481 } 482 return true; 483 } 484 485 /* Set the cpu flags as per a return from an SVE helper. */ 486 static void do_pred_flags(TCGv_i32 t) 487 { 488 tcg_gen_mov_i32(cpu_NF, t); 489 tcg_gen_andi_i32(cpu_ZF, t, 2); 490 tcg_gen_andi_i32(cpu_CF, t, 1); 491 tcg_gen_movi_i32(cpu_VF, 0); 492 } 493 494 /* Subroutines computing the ARM PredTest psuedofunction. */ 495 static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 496 { 497 TCGv_i32 t = tcg_temp_new_i32(); 498 499 gen_helper_sve_predtest1(t, d, g); 500 do_pred_flags(t); 501 } 502 503 static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 504 { 505 TCGv_ptr dptr = tcg_temp_new_ptr(); 506 TCGv_ptr gptr = tcg_temp_new_ptr(); 507 TCGv_i32 t = tcg_temp_new_i32(); 508 509 tcg_gen_addi_ptr(dptr, cpu_env, dofs); 510 tcg_gen_addi_ptr(gptr, cpu_env, gofs); 511 512 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); 513 514 do_pred_flags(t); 515 } 516 517 /* For each element size, the bits within a predicate word that are active. */ 518 const uint64_t pred_esz_masks[5] = { 519 0xffffffffffffffffull, 0x5555555555555555ull, 520 0x1111111111111111ull, 0x0101010101010101ull, 521 0x0001000100010001ull, 522 }; 523 524 static bool trans_INVALID(DisasContext *s, arg_INVALID *a) 525 { 526 unallocated_encoding(s); 527 return true; 528 } 529 530 /* 531 *** SVE Logical - Unpredicated Group 532 */ 533 534 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) 535 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) 536 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) 537 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) 538 539 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 540 { 541 TCGv_i64 t = tcg_temp_new_i64(); 542 uint64_t mask = dup_const(MO_8, 0xff >> sh); 543 544 tcg_gen_xor_i64(t, n, m); 545 tcg_gen_shri_i64(d, t, sh); 546 tcg_gen_shli_i64(t, t, 8 - sh); 547 tcg_gen_andi_i64(d, d, mask); 548 tcg_gen_andi_i64(t, t, ~mask); 549 tcg_gen_or_i64(d, d, t); 550 } 551 552 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 553 { 554 TCGv_i64 t = tcg_temp_new_i64(); 555 uint64_t mask = dup_const(MO_16, 0xffff >> sh); 556 557 tcg_gen_xor_i64(t, n, m); 558 tcg_gen_shri_i64(d, t, sh); 559 tcg_gen_shli_i64(t, t, 16 - sh); 560 tcg_gen_andi_i64(d, d, mask); 561 tcg_gen_andi_i64(t, t, ~mask); 562 tcg_gen_or_i64(d, d, t); 563 } 564 565 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) 566 { 567 tcg_gen_xor_i32(d, n, m); 568 tcg_gen_rotri_i32(d, d, sh); 569 } 570 571 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 572 { 573 tcg_gen_xor_i64(d, n, m); 574 tcg_gen_rotri_i64(d, d, sh); 575 } 576 577 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 578 TCGv_vec m, int64_t sh) 579 { 580 tcg_gen_xor_vec(vece, d, n, m); 581 tcg_gen_rotri_vec(vece, d, d, sh); 582 } 583 584 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 585 uint32_t rm_ofs, int64_t shift, 586 uint32_t opr_sz, uint32_t max_sz) 587 { 588 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; 589 static const GVecGen3i ops[4] = { 590 { .fni8 = gen_xar8_i64, 591 .fniv = gen_xar_vec, 592 .fno = gen_helper_sve2_xar_b, 593 .opt_opc = vecop, 594 .vece = MO_8 }, 595 { .fni8 = gen_xar16_i64, 596 .fniv = gen_xar_vec, 597 .fno = gen_helper_sve2_xar_h, 598 .opt_opc = vecop, 599 .vece = MO_16 }, 600 { .fni4 = gen_xar_i32, 601 .fniv = gen_xar_vec, 602 .fno = gen_helper_sve2_xar_s, 603 .opt_opc = vecop, 604 .vece = MO_32 }, 605 { .fni8 = gen_xar_i64, 606 .fniv = gen_xar_vec, 607 .fno = gen_helper_gvec_xar_d, 608 .opt_opc = vecop, 609 .vece = MO_64 } 610 }; 611 int esize = 8 << vece; 612 613 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ 614 tcg_debug_assert(shift >= 0); 615 tcg_debug_assert(shift <= esize); 616 shift &= esize - 1; 617 618 if (shift == 0) { 619 /* xar with no rotate devolves to xor. */ 620 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); 621 } else { 622 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 623 shift, &ops[vece]); 624 } 625 } 626 627 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 628 { 629 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 630 return false; 631 } 632 if (sve_access_check(s)) { 633 unsigned vsz = vec_full_reg_size(s); 634 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 635 vec_full_reg_offset(s, a->rn), 636 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 637 } 638 return true; 639 } 640 641 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 642 { 643 tcg_gen_xor_i64(d, n, m); 644 tcg_gen_xor_i64(d, d, k); 645 } 646 647 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 648 TCGv_vec m, TCGv_vec k) 649 { 650 tcg_gen_xor_vec(vece, d, n, m); 651 tcg_gen_xor_vec(vece, d, d, k); 652 } 653 654 static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 655 uint32_t a, uint32_t oprsz, uint32_t maxsz) 656 { 657 static const GVecGen4 op = { 658 .fni8 = gen_eor3_i64, 659 .fniv = gen_eor3_vec, 660 .fno = gen_helper_sve2_eor3, 661 .vece = MO_64, 662 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 663 }; 664 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 665 } 666 667 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a) 668 669 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 670 { 671 tcg_gen_andc_i64(d, m, k); 672 tcg_gen_xor_i64(d, d, n); 673 } 674 675 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 676 TCGv_vec m, TCGv_vec k) 677 { 678 tcg_gen_andc_vec(vece, d, m, k); 679 tcg_gen_xor_vec(vece, d, d, n); 680 } 681 682 static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 683 uint32_t a, uint32_t oprsz, uint32_t maxsz) 684 { 685 static const GVecGen4 op = { 686 .fni8 = gen_bcax_i64, 687 .fniv = gen_bcax_vec, 688 .fno = gen_helper_sve2_bcax, 689 .vece = MO_64, 690 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 691 }; 692 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 693 } 694 695 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a) 696 697 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 698 uint32_t a, uint32_t oprsz, uint32_t maxsz) 699 { 700 /* BSL differs from the generic bitsel in argument ordering. */ 701 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 702 } 703 704 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) 705 706 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 707 { 708 tcg_gen_andc_i64(n, k, n); 709 tcg_gen_andc_i64(m, m, k); 710 tcg_gen_or_i64(d, n, m); 711 } 712 713 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 714 TCGv_vec m, TCGv_vec k) 715 { 716 if (TCG_TARGET_HAS_bitsel_vec) { 717 tcg_gen_not_vec(vece, n, n); 718 tcg_gen_bitsel_vec(vece, d, k, n, m); 719 } else { 720 tcg_gen_andc_vec(vece, n, k, n); 721 tcg_gen_andc_vec(vece, m, m, k); 722 tcg_gen_or_vec(vece, d, n, m); 723 } 724 } 725 726 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 727 uint32_t a, uint32_t oprsz, uint32_t maxsz) 728 { 729 static const GVecGen4 op = { 730 .fni8 = gen_bsl1n_i64, 731 .fniv = gen_bsl1n_vec, 732 .fno = gen_helper_sve2_bsl1n, 733 .vece = MO_64, 734 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 735 }; 736 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 737 } 738 739 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) 740 741 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 742 { 743 /* 744 * Z[dn] = (n & k) | (~m & ~k) 745 * = | ~(m | k) 746 */ 747 tcg_gen_and_i64(n, n, k); 748 if (TCG_TARGET_HAS_orc_i64) { 749 tcg_gen_or_i64(m, m, k); 750 tcg_gen_orc_i64(d, n, m); 751 } else { 752 tcg_gen_nor_i64(m, m, k); 753 tcg_gen_or_i64(d, n, m); 754 } 755 } 756 757 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 758 TCGv_vec m, TCGv_vec k) 759 { 760 if (TCG_TARGET_HAS_bitsel_vec) { 761 tcg_gen_not_vec(vece, m, m); 762 tcg_gen_bitsel_vec(vece, d, k, n, m); 763 } else { 764 tcg_gen_and_vec(vece, n, n, k); 765 tcg_gen_or_vec(vece, m, m, k); 766 tcg_gen_orc_vec(vece, d, n, m); 767 } 768 } 769 770 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 771 uint32_t a, uint32_t oprsz, uint32_t maxsz) 772 { 773 static const GVecGen4 op = { 774 .fni8 = gen_bsl2n_i64, 775 .fniv = gen_bsl2n_vec, 776 .fno = gen_helper_sve2_bsl2n, 777 .vece = MO_64, 778 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 779 }; 780 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 781 } 782 783 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) 784 785 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 786 { 787 tcg_gen_and_i64(n, n, k); 788 tcg_gen_andc_i64(m, m, k); 789 tcg_gen_nor_i64(d, n, m); 790 } 791 792 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 793 TCGv_vec m, TCGv_vec k) 794 { 795 tcg_gen_bitsel_vec(vece, d, k, n, m); 796 tcg_gen_not_vec(vece, d, d); 797 } 798 799 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 800 uint32_t a, uint32_t oprsz, uint32_t maxsz) 801 { 802 static const GVecGen4 op = { 803 .fni8 = gen_nbsl_i64, 804 .fniv = gen_nbsl_vec, 805 .fno = gen_helper_sve2_nbsl, 806 .vece = MO_64, 807 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 808 }; 809 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 810 } 811 812 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) 813 814 /* 815 *** SVE Integer Arithmetic - Unpredicated Group 816 */ 817 818 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) 819 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) 820 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) 821 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) 822 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) 823 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) 824 825 /* 826 *** SVE Integer Arithmetic - Binary Predicated Group 827 */ 828 829 /* Select active elememnts from Zn and inactive elements from Zm, 830 * storing the result in Zd. 831 */ 832 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 833 { 834 static gen_helper_gvec_4 * const fns[4] = { 835 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 836 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 837 }; 838 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 839 } 840 841 #define DO_ZPZZ(NAME, FEAT, name) \ 842 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ 843 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ 844 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ 845 }; \ 846 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ 847 name##_zpzz_fns[a->esz], a, 0) 848 849 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) 850 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) 851 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) 852 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) 853 854 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) 855 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) 856 857 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) 858 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) 859 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) 860 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) 861 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) 862 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) 863 864 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) 865 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) 866 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) 867 868 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) 869 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) 870 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) 871 872 static gen_helper_gvec_4 * const sdiv_fns[4] = { 873 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 874 }; 875 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) 876 877 static gen_helper_gvec_4 * const udiv_fns[4] = { 878 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 879 }; 880 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) 881 882 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) 883 884 /* 885 *** SVE Integer Arithmetic - Unary Predicated Group 886 */ 887 888 #define DO_ZPZ(NAME, FEAT, name) \ 889 static gen_helper_gvec_3 * const name##_fns[4] = { \ 890 gen_helper_##name##_b, gen_helper_##name##_h, \ 891 gen_helper_##name##_s, gen_helper_##name##_d, \ 892 }; \ 893 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) 894 895 DO_ZPZ(CLS, aa64_sve, sve_cls) 896 DO_ZPZ(CLZ, aa64_sve, sve_clz) 897 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) 898 DO_ZPZ(CNOT, aa64_sve, sve_cnot) 899 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) 900 DO_ZPZ(ABS, aa64_sve, sve_abs) 901 DO_ZPZ(NEG, aa64_sve, sve_neg) 902 DO_ZPZ(RBIT, aa64_sve, sve_rbit) 903 904 static gen_helper_gvec_3 * const fabs_fns[4] = { 905 NULL, gen_helper_sve_fabs_h, 906 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, 907 }; 908 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) 909 910 static gen_helper_gvec_3 * const fneg_fns[4] = { 911 NULL, gen_helper_sve_fneg_h, 912 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, 913 }; 914 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) 915 916 static gen_helper_gvec_3 * const sxtb_fns[4] = { 917 NULL, gen_helper_sve_sxtb_h, 918 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, 919 }; 920 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) 921 922 static gen_helper_gvec_3 * const uxtb_fns[4] = { 923 NULL, gen_helper_sve_uxtb_h, 924 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, 925 }; 926 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) 927 928 static gen_helper_gvec_3 * const sxth_fns[4] = { 929 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d 930 }; 931 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) 932 933 static gen_helper_gvec_3 * const uxth_fns[4] = { 934 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d 935 }; 936 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) 937 938 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, 939 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) 940 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, 941 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) 942 943 /* 944 *** SVE Integer Reduction Group 945 */ 946 947 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 948 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 949 gen_helper_gvec_reduc *fn) 950 { 951 unsigned vsz = vec_full_reg_size(s); 952 TCGv_ptr t_zn, t_pg; 953 TCGv_i32 desc; 954 TCGv_i64 temp; 955 956 if (fn == NULL) { 957 return false; 958 } 959 if (!sve_access_check(s)) { 960 return true; 961 } 962 963 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 964 temp = tcg_temp_new_i64(); 965 t_zn = tcg_temp_new_ptr(); 966 t_pg = tcg_temp_new_ptr(); 967 968 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 969 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 970 fn(temp, t_zn, t_pg, desc); 971 972 write_fp_dreg(s, a->rd, temp); 973 return true; 974 } 975 976 #define DO_VPZ(NAME, name) \ 977 static gen_helper_gvec_reduc * const name##_fns[4] = { \ 978 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 979 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 980 }; \ 981 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) 982 983 DO_VPZ(ORV, orv) 984 DO_VPZ(ANDV, andv) 985 DO_VPZ(EORV, eorv) 986 987 DO_VPZ(UADDV, uaddv) 988 DO_VPZ(SMAXV, smaxv) 989 DO_VPZ(UMAXV, umaxv) 990 DO_VPZ(SMINV, sminv) 991 DO_VPZ(UMINV, uminv) 992 993 static gen_helper_gvec_reduc * const saddv_fns[4] = { 994 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 995 gen_helper_sve_saddv_s, NULL 996 }; 997 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) 998 999 #undef DO_VPZ 1000 1001 /* 1002 *** SVE Shift by Immediate - Predicated Group 1003 */ 1004 1005 /* 1006 * Copy Zn into Zd, storing zeros into inactive elements. 1007 * If invert, store zeros into the active elements. 1008 */ 1009 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 1010 int esz, bool invert) 1011 { 1012 static gen_helper_gvec_3 * const fns[4] = { 1013 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 1014 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 1015 }; 1016 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 1017 } 1018 1019 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, 1020 gen_helper_gvec_3 * const fns[4]) 1021 { 1022 int max; 1023 1024 if (a->esz < 0) { 1025 /* Invalid tsz encoding -- see tszimm_esz. */ 1026 return false; 1027 } 1028 1029 /* 1030 * Shift by element size is architecturally valid. 1031 * For arithmetic right-shift, it's the same as by one less. 1032 * For logical shifts and ASRD, it is a zeroing operation. 1033 */ 1034 max = 8 << a->esz; 1035 if (a->imm >= max) { 1036 if (asr) { 1037 a->imm = max - 1; 1038 } else { 1039 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 1040 } 1041 } 1042 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); 1043 } 1044 1045 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { 1046 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 1047 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 1048 }; 1049 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) 1050 1051 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { 1052 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 1053 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 1054 }; 1055 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) 1056 1057 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { 1058 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 1059 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 1060 }; 1061 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) 1062 1063 static gen_helper_gvec_3 * const asrd_fns[4] = { 1064 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 1065 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 1066 }; 1067 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) 1068 1069 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { 1070 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 1071 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 1072 }; 1073 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1074 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) 1075 1076 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { 1077 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 1078 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 1079 }; 1080 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, 1081 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) 1082 1083 static gen_helper_gvec_3 * const srshr_fns[4] = { 1084 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 1085 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 1086 }; 1087 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1088 a->esz < 0 ? NULL : srshr_fns[a->esz], a) 1089 1090 static gen_helper_gvec_3 * const urshr_fns[4] = { 1091 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 1092 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 1093 }; 1094 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, 1095 a->esz < 0 ? NULL : urshr_fns[a->esz], a) 1096 1097 static gen_helper_gvec_3 * const sqshlu_fns[4] = { 1098 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 1099 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 1100 }; 1101 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, 1102 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) 1103 1104 /* 1105 *** SVE Bitwise Shift - Predicated Group 1106 */ 1107 1108 #define DO_ZPZW(NAME, name) \ 1109 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ 1110 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 1111 gen_helper_sve_##name##_zpzw_s, NULL \ 1112 }; \ 1113 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ 1114 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) 1115 1116 DO_ZPZW(ASR, asr) 1117 DO_ZPZW(LSR, lsr) 1118 DO_ZPZW(LSL, lsl) 1119 1120 #undef DO_ZPZW 1121 1122 /* 1123 *** SVE Bitwise Shift - Unpredicated Group 1124 */ 1125 1126 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 1127 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 1128 int64_t, uint32_t, uint32_t)) 1129 { 1130 if (a->esz < 0) { 1131 /* Invalid tsz encoding -- see tszimm_esz. */ 1132 return false; 1133 } 1134 if (sve_access_check(s)) { 1135 unsigned vsz = vec_full_reg_size(s); 1136 /* Shift by element size is architecturally valid. For 1137 arithmetic right-shift, it's the same as by one less. 1138 Otherwise it is a zeroing operation. */ 1139 if (a->imm >= 8 << a->esz) { 1140 if (asr) { 1141 a->imm = (8 << a->esz) - 1; 1142 } else { 1143 do_dupi_z(s, a->rd, 0); 1144 return true; 1145 } 1146 } 1147 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 1148 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 1149 } 1150 return true; 1151 } 1152 1153 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) 1154 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) 1155 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) 1156 1157 #define DO_ZZW(NAME, name) \ 1158 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ 1159 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1160 gen_helper_sve_##name##_zzw_s, NULL \ 1161 }; \ 1162 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ 1163 name##_zzw_fns[a->esz], a, 0) 1164 1165 DO_ZZW(ASR_zzw, asr) 1166 DO_ZZW(LSR_zzw, lsr) 1167 DO_ZZW(LSL_zzw, lsl) 1168 1169 #undef DO_ZZW 1170 1171 /* 1172 *** SVE Integer Multiply-Add Group 1173 */ 1174 1175 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1176 gen_helper_gvec_5 *fn) 1177 { 1178 if (sve_access_check(s)) { 1179 unsigned vsz = vec_full_reg_size(s); 1180 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1181 vec_full_reg_offset(s, a->ra), 1182 vec_full_reg_offset(s, a->rn), 1183 vec_full_reg_offset(s, a->rm), 1184 pred_full_reg_offset(s, a->pg), 1185 vsz, vsz, 0, fn); 1186 } 1187 return true; 1188 } 1189 1190 static gen_helper_gvec_5 * const mla_fns[4] = { 1191 gen_helper_sve_mla_b, gen_helper_sve_mla_h, 1192 gen_helper_sve_mla_s, gen_helper_sve_mla_d, 1193 }; 1194 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) 1195 1196 static gen_helper_gvec_5 * const mls_fns[4] = { 1197 gen_helper_sve_mls_b, gen_helper_sve_mls_h, 1198 gen_helper_sve_mls_s, gen_helper_sve_mls_d, 1199 }; 1200 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) 1201 1202 /* 1203 *** SVE Index Generation Group 1204 */ 1205 1206 static bool do_index(DisasContext *s, int esz, int rd, 1207 TCGv_i64 start, TCGv_i64 incr) 1208 { 1209 unsigned vsz; 1210 TCGv_i32 desc; 1211 TCGv_ptr t_zd; 1212 1213 if (!sve_access_check(s)) { 1214 return true; 1215 } 1216 1217 vsz = vec_full_reg_size(s); 1218 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1219 t_zd = tcg_temp_new_ptr(); 1220 1221 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 1222 if (esz == 3) { 1223 gen_helper_sve_index_d(t_zd, start, incr, desc); 1224 } else { 1225 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1226 static index_fn * const fns[3] = { 1227 gen_helper_sve_index_b, 1228 gen_helper_sve_index_h, 1229 gen_helper_sve_index_s, 1230 }; 1231 TCGv_i32 s32 = tcg_temp_new_i32(); 1232 TCGv_i32 i32 = tcg_temp_new_i32(); 1233 1234 tcg_gen_extrl_i64_i32(s32, start); 1235 tcg_gen_extrl_i64_i32(i32, incr); 1236 fns[esz](t_zd, s32, i32, desc); 1237 } 1238 return true; 1239 } 1240 1241 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, 1242 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) 1243 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, 1244 tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) 1245 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, 1246 cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) 1247 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, 1248 cpu_reg(s, a->rn), cpu_reg(s, a->rm)) 1249 1250 /* 1251 *** SVE Stack Allocation Group 1252 */ 1253 1254 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1255 { 1256 if (!dc_isar_feature(aa64_sve, s)) { 1257 return false; 1258 } 1259 if (sve_access_check(s)) { 1260 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1261 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1262 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1263 } 1264 return true; 1265 } 1266 1267 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) 1268 { 1269 if (!dc_isar_feature(aa64_sme, s)) { 1270 return false; 1271 } 1272 if (sme_enabled_check(s)) { 1273 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1274 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1275 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); 1276 } 1277 return true; 1278 } 1279 1280 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1281 { 1282 if (!dc_isar_feature(aa64_sve, s)) { 1283 return false; 1284 } 1285 if (sve_access_check(s)) { 1286 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1287 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1288 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1289 } 1290 return true; 1291 } 1292 1293 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) 1294 { 1295 if (!dc_isar_feature(aa64_sme, s)) { 1296 return false; 1297 } 1298 if (sme_enabled_check(s)) { 1299 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1300 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1301 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); 1302 } 1303 return true; 1304 } 1305 1306 static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1307 { 1308 if (!dc_isar_feature(aa64_sve, s)) { 1309 return false; 1310 } 1311 if (sve_access_check(s)) { 1312 TCGv_i64 reg = cpu_reg(s, a->rd); 1313 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1314 } 1315 return true; 1316 } 1317 1318 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) 1319 { 1320 if (!dc_isar_feature(aa64_sme, s)) { 1321 return false; 1322 } 1323 if (sme_enabled_check(s)) { 1324 TCGv_i64 reg = cpu_reg(s, a->rd); 1325 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); 1326 } 1327 return true; 1328 } 1329 1330 /* 1331 *** SVE Compute Vector Address Group 1332 */ 1333 1334 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1335 { 1336 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1337 } 1338 1339 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) 1340 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) 1341 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) 1342 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) 1343 1344 /* 1345 *** SVE Integer Misc - Unpredicated Group 1346 */ 1347 1348 static gen_helper_gvec_2 * const fexpa_fns[4] = { 1349 NULL, gen_helper_sve_fexpa_h, 1350 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, 1351 }; 1352 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, 1353 fexpa_fns[a->esz], a->rd, a->rn, 0) 1354 1355 static gen_helper_gvec_3 * const ftssel_fns[4] = { 1356 NULL, gen_helper_sve_ftssel_h, 1357 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, 1358 }; 1359 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, 1360 ftssel_fns[a->esz], a, 0) 1361 1362 /* 1363 *** SVE Predicate Logical Operations Group 1364 */ 1365 1366 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1367 const GVecGen4 *gvec_op) 1368 { 1369 if (!sve_access_check(s)) { 1370 return true; 1371 } 1372 1373 unsigned psz = pred_gvec_reg_size(s); 1374 int dofs = pred_full_reg_offset(s, a->rd); 1375 int nofs = pred_full_reg_offset(s, a->rn); 1376 int mofs = pred_full_reg_offset(s, a->rm); 1377 int gofs = pred_full_reg_offset(s, a->pg); 1378 1379 if (!a->s) { 1380 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1381 return true; 1382 } 1383 1384 if (psz == 8) { 1385 /* Do the operation and the flags generation in temps. */ 1386 TCGv_i64 pd = tcg_temp_new_i64(); 1387 TCGv_i64 pn = tcg_temp_new_i64(); 1388 TCGv_i64 pm = tcg_temp_new_i64(); 1389 TCGv_i64 pg = tcg_temp_new_i64(); 1390 1391 tcg_gen_ld_i64(pn, cpu_env, nofs); 1392 tcg_gen_ld_i64(pm, cpu_env, mofs); 1393 tcg_gen_ld_i64(pg, cpu_env, gofs); 1394 1395 gvec_op->fni8(pd, pn, pm, pg); 1396 tcg_gen_st_i64(pd, cpu_env, dofs); 1397 1398 do_predtest1(pd, pg); 1399 } else { 1400 /* The operation and flags generation is large. The computation 1401 * of the flags depends on the original contents of the guarding 1402 * predicate. If the destination overwrites the guarding predicate, 1403 * then the easiest way to get this right is to save a copy. 1404 */ 1405 int tofs = gofs; 1406 if (a->rd == a->pg) { 1407 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1408 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1409 } 1410 1411 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1412 do_predtest(s, dofs, tofs, psz / 8); 1413 } 1414 return true; 1415 } 1416 1417 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1418 { 1419 tcg_gen_and_i64(pd, pn, pm); 1420 tcg_gen_and_i64(pd, pd, pg); 1421 } 1422 1423 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1424 TCGv_vec pm, TCGv_vec pg) 1425 { 1426 tcg_gen_and_vec(vece, pd, pn, pm); 1427 tcg_gen_and_vec(vece, pd, pd, pg); 1428 } 1429 1430 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1431 { 1432 static const GVecGen4 op = { 1433 .fni8 = gen_and_pg_i64, 1434 .fniv = gen_and_pg_vec, 1435 .fno = gen_helper_sve_and_pppp, 1436 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1437 }; 1438 1439 if (!dc_isar_feature(aa64_sve, s)) { 1440 return false; 1441 } 1442 if (!a->s) { 1443 if (a->rn == a->rm) { 1444 if (a->pg == a->rn) { 1445 return do_mov_p(s, a->rd, a->rn); 1446 } 1447 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1448 } else if (a->pg == a->rn || a->pg == a->rm) { 1449 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1450 } 1451 } 1452 return do_pppp_flags(s, a, &op); 1453 } 1454 1455 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1456 { 1457 tcg_gen_andc_i64(pd, pn, pm); 1458 tcg_gen_and_i64(pd, pd, pg); 1459 } 1460 1461 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1462 TCGv_vec pm, TCGv_vec pg) 1463 { 1464 tcg_gen_andc_vec(vece, pd, pn, pm); 1465 tcg_gen_and_vec(vece, pd, pd, pg); 1466 } 1467 1468 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1469 { 1470 static const GVecGen4 op = { 1471 .fni8 = gen_bic_pg_i64, 1472 .fniv = gen_bic_pg_vec, 1473 .fno = gen_helper_sve_bic_pppp, 1474 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1475 }; 1476 1477 if (!dc_isar_feature(aa64_sve, s)) { 1478 return false; 1479 } 1480 if (!a->s && a->pg == a->rn) { 1481 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1482 } 1483 return do_pppp_flags(s, a, &op); 1484 } 1485 1486 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1487 { 1488 tcg_gen_xor_i64(pd, pn, pm); 1489 tcg_gen_and_i64(pd, pd, pg); 1490 } 1491 1492 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1493 TCGv_vec pm, TCGv_vec pg) 1494 { 1495 tcg_gen_xor_vec(vece, pd, pn, pm); 1496 tcg_gen_and_vec(vece, pd, pd, pg); 1497 } 1498 1499 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1500 { 1501 static const GVecGen4 op = { 1502 .fni8 = gen_eor_pg_i64, 1503 .fniv = gen_eor_pg_vec, 1504 .fno = gen_helper_sve_eor_pppp, 1505 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1506 }; 1507 1508 if (!dc_isar_feature(aa64_sve, s)) { 1509 return false; 1510 } 1511 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ 1512 if (!a->s && a->pg == a->rm) { 1513 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); 1514 } 1515 return do_pppp_flags(s, a, &op); 1516 } 1517 1518 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1519 { 1520 if (a->s || !dc_isar_feature(aa64_sve, s)) { 1521 return false; 1522 } 1523 if (sve_access_check(s)) { 1524 unsigned psz = pred_gvec_reg_size(s); 1525 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1526 pred_full_reg_offset(s, a->pg), 1527 pred_full_reg_offset(s, a->rn), 1528 pred_full_reg_offset(s, a->rm), psz, psz); 1529 } 1530 return true; 1531 } 1532 1533 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1534 { 1535 tcg_gen_or_i64(pd, pn, pm); 1536 tcg_gen_and_i64(pd, pd, pg); 1537 } 1538 1539 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1540 TCGv_vec pm, TCGv_vec pg) 1541 { 1542 tcg_gen_or_vec(vece, pd, pn, pm); 1543 tcg_gen_and_vec(vece, pd, pd, pg); 1544 } 1545 1546 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1547 { 1548 static const GVecGen4 op = { 1549 .fni8 = gen_orr_pg_i64, 1550 .fniv = gen_orr_pg_vec, 1551 .fno = gen_helper_sve_orr_pppp, 1552 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1553 }; 1554 1555 if (!dc_isar_feature(aa64_sve, s)) { 1556 return false; 1557 } 1558 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1559 return do_mov_p(s, a->rd, a->rn); 1560 } 1561 return do_pppp_flags(s, a, &op); 1562 } 1563 1564 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1565 { 1566 tcg_gen_orc_i64(pd, pn, pm); 1567 tcg_gen_and_i64(pd, pd, pg); 1568 } 1569 1570 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1571 TCGv_vec pm, TCGv_vec pg) 1572 { 1573 tcg_gen_orc_vec(vece, pd, pn, pm); 1574 tcg_gen_and_vec(vece, pd, pd, pg); 1575 } 1576 1577 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1578 { 1579 static const GVecGen4 op = { 1580 .fni8 = gen_orn_pg_i64, 1581 .fniv = gen_orn_pg_vec, 1582 .fno = gen_helper_sve_orn_pppp, 1583 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1584 }; 1585 1586 if (!dc_isar_feature(aa64_sve, s)) { 1587 return false; 1588 } 1589 return do_pppp_flags(s, a, &op); 1590 } 1591 1592 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1593 { 1594 tcg_gen_or_i64(pd, pn, pm); 1595 tcg_gen_andc_i64(pd, pg, pd); 1596 } 1597 1598 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1599 TCGv_vec pm, TCGv_vec pg) 1600 { 1601 tcg_gen_or_vec(vece, pd, pn, pm); 1602 tcg_gen_andc_vec(vece, pd, pg, pd); 1603 } 1604 1605 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1606 { 1607 static const GVecGen4 op = { 1608 .fni8 = gen_nor_pg_i64, 1609 .fniv = gen_nor_pg_vec, 1610 .fno = gen_helper_sve_nor_pppp, 1611 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1612 }; 1613 1614 if (!dc_isar_feature(aa64_sve, s)) { 1615 return false; 1616 } 1617 return do_pppp_flags(s, a, &op); 1618 } 1619 1620 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1621 { 1622 tcg_gen_and_i64(pd, pn, pm); 1623 tcg_gen_andc_i64(pd, pg, pd); 1624 } 1625 1626 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1627 TCGv_vec pm, TCGv_vec pg) 1628 { 1629 tcg_gen_and_vec(vece, pd, pn, pm); 1630 tcg_gen_andc_vec(vece, pd, pg, pd); 1631 } 1632 1633 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1634 { 1635 static const GVecGen4 op = { 1636 .fni8 = gen_nand_pg_i64, 1637 .fniv = gen_nand_pg_vec, 1638 .fno = gen_helper_sve_nand_pppp, 1639 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1640 }; 1641 1642 if (!dc_isar_feature(aa64_sve, s)) { 1643 return false; 1644 } 1645 return do_pppp_flags(s, a, &op); 1646 } 1647 1648 /* 1649 *** SVE Predicate Misc Group 1650 */ 1651 1652 static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1653 { 1654 if (!dc_isar_feature(aa64_sve, s)) { 1655 return false; 1656 } 1657 if (sve_access_check(s)) { 1658 int nofs = pred_full_reg_offset(s, a->rn); 1659 int gofs = pred_full_reg_offset(s, a->pg); 1660 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1661 1662 if (words == 1) { 1663 TCGv_i64 pn = tcg_temp_new_i64(); 1664 TCGv_i64 pg = tcg_temp_new_i64(); 1665 1666 tcg_gen_ld_i64(pn, cpu_env, nofs); 1667 tcg_gen_ld_i64(pg, cpu_env, gofs); 1668 do_predtest1(pn, pg); 1669 } else { 1670 do_predtest(s, nofs, gofs, words); 1671 } 1672 } 1673 return true; 1674 } 1675 1676 /* See the ARM pseudocode DecodePredCount. */ 1677 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1678 { 1679 unsigned elements = fullsz >> esz; 1680 unsigned bound; 1681 1682 switch (pattern) { 1683 case 0x0: /* POW2 */ 1684 return pow2floor(elements); 1685 case 0x1: /* VL1 */ 1686 case 0x2: /* VL2 */ 1687 case 0x3: /* VL3 */ 1688 case 0x4: /* VL4 */ 1689 case 0x5: /* VL5 */ 1690 case 0x6: /* VL6 */ 1691 case 0x7: /* VL7 */ 1692 case 0x8: /* VL8 */ 1693 bound = pattern; 1694 break; 1695 case 0x9: /* VL16 */ 1696 case 0xa: /* VL32 */ 1697 case 0xb: /* VL64 */ 1698 case 0xc: /* VL128 */ 1699 case 0xd: /* VL256 */ 1700 bound = 16 << (pattern - 9); 1701 break; 1702 case 0x1d: /* MUL4 */ 1703 return elements - elements % 4; 1704 case 0x1e: /* MUL3 */ 1705 return elements - elements % 3; 1706 case 0x1f: /* ALL */ 1707 return elements; 1708 default: /* #uimm5 */ 1709 return 0; 1710 } 1711 return elements >= bound ? bound : 0; 1712 } 1713 1714 /* This handles all of the predicate initialization instructions, 1715 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1716 * so that decode_pred_count returns 0. For SETFFR, we will have 1717 * set RD == 16 == FFR. 1718 */ 1719 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1720 { 1721 if (!sve_access_check(s)) { 1722 return true; 1723 } 1724 1725 unsigned fullsz = vec_full_reg_size(s); 1726 unsigned ofs = pred_full_reg_offset(s, rd); 1727 unsigned numelem, setsz, i; 1728 uint64_t word, lastword; 1729 TCGv_i64 t; 1730 1731 numelem = decode_pred_count(fullsz, pat, esz); 1732 1733 /* Determine what we must store into each bit, and how many. */ 1734 if (numelem == 0) { 1735 lastword = word = 0; 1736 setsz = fullsz; 1737 } else { 1738 setsz = numelem << esz; 1739 lastword = word = pred_esz_masks[esz]; 1740 if (setsz % 64) { 1741 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1742 } 1743 } 1744 1745 t = tcg_temp_new_i64(); 1746 if (fullsz <= 64) { 1747 tcg_gen_movi_i64(t, lastword); 1748 tcg_gen_st_i64(t, cpu_env, ofs); 1749 goto done; 1750 } 1751 1752 if (word == lastword) { 1753 unsigned maxsz = size_for_gvec(fullsz / 8); 1754 unsigned oprsz = size_for_gvec(setsz / 8); 1755 1756 if (oprsz * 8 == setsz) { 1757 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1758 goto done; 1759 } 1760 } 1761 1762 setsz /= 8; 1763 fullsz /= 8; 1764 1765 tcg_gen_movi_i64(t, word); 1766 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1767 tcg_gen_st_i64(t, cpu_env, ofs + i); 1768 } 1769 if (lastword != word) { 1770 tcg_gen_movi_i64(t, lastword); 1771 tcg_gen_st_i64(t, cpu_env, ofs + i); 1772 i += 8; 1773 } 1774 if (i < fullsz) { 1775 tcg_gen_movi_i64(t, 0); 1776 for (; i < fullsz; i += 8) { 1777 tcg_gen_st_i64(t, cpu_env, ofs + i); 1778 } 1779 } 1780 1781 done: 1782 /* PTRUES */ 1783 if (setflag) { 1784 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1785 tcg_gen_movi_i32(cpu_CF, word == 0); 1786 tcg_gen_movi_i32(cpu_VF, 0); 1787 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1788 } 1789 return true; 1790 } 1791 1792 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) 1793 1794 /* Note pat == 31 is #all, to set all elements. */ 1795 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, 1796 do_predset, 0, FFR_PRED_NUM, 31, false) 1797 1798 /* Note pat == 32 is #unimp, to set no elements. */ 1799 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) 1800 1801 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1802 { 1803 /* The path through do_pppp_flags is complicated enough to want to avoid 1804 * duplication. Frob the arguments into the form of a predicated AND. 1805 */ 1806 arg_rprr_s alt_a = { 1807 .rd = a->rd, .pg = a->pg, .s = a->s, 1808 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1809 }; 1810 1811 s->is_nonstreaming = true; 1812 return trans_AND_pppp(s, &alt_a); 1813 } 1814 1815 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) 1816 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) 1817 1818 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1819 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1820 TCGv_ptr, TCGv_i32)) 1821 { 1822 if (!sve_access_check(s)) { 1823 return true; 1824 } 1825 1826 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1827 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1828 TCGv_i32 t; 1829 unsigned desc = 0; 1830 1831 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1832 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1833 1834 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd)); 1835 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn)); 1836 t = tcg_temp_new_i32(); 1837 1838 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); 1839 1840 do_pred_flags(t); 1841 return true; 1842 } 1843 1844 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) 1845 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) 1846 1847 /* 1848 *** SVE Element Count Group 1849 */ 1850 1851 /* Perform an inline saturating addition of a 32-bit value within 1852 * a 64-bit register. The second operand is known to be positive, 1853 * which halves the comparisions we must perform to bound the result. 1854 */ 1855 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1856 { 1857 int64_t ibound; 1858 1859 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1860 if (u) { 1861 tcg_gen_ext32u_i64(reg, reg); 1862 } else { 1863 tcg_gen_ext32s_i64(reg, reg); 1864 } 1865 if (d) { 1866 tcg_gen_sub_i64(reg, reg, val); 1867 ibound = (u ? 0 : INT32_MIN); 1868 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); 1869 } else { 1870 tcg_gen_add_i64(reg, reg, val); 1871 ibound = (u ? UINT32_MAX : INT32_MAX); 1872 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); 1873 } 1874 } 1875 1876 /* Similarly with 64-bit values. */ 1877 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1878 { 1879 TCGv_i64 t0 = tcg_temp_new_i64(); 1880 TCGv_i64 t2; 1881 1882 if (u) { 1883 if (d) { 1884 tcg_gen_sub_i64(t0, reg, val); 1885 t2 = tcg_constant_i64(0); 1886 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); 1887 } else { 1888 tcg_gen_add_i64(t0, reg, val); 1889 t2 = tcg_constant_i64(-1); 1890 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); 1891 } 1892 } else { 1893 TCGv_i64 t1 = tcg_temp_new_i64(); 1894 if (d) { 1895 /* Detect signed overflow for subtraction. */ 1896 tcg_gen_xor_i64(t0, reg, val); 1897 tcg_gen_sub_i64(t1, reg, val); 1898 tcg_gen_xor_i64(reg, reg, t1); 1899 tcg_gen_and_i64(t0, t0, reg); 1900 1901 /* Bound the result. */ 1902 tcg_gen_movi_i64(reg, INT64_MIN); 1903 t2 = tcg_constant_i64(0); 1904 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1905 } else { 1906 /* Detect signed overflow for addition. */ 1907 tcg_gen_xor_i64(t0, reg, val); 1908 tcg_gen_add_i64(reg, reg, val); 1909 tcg_gen_xor_i64(t1, reg, val); 1910 tcg_gen_andc_i64(t0, t1, t0); 1911 1912 /* Bound the result. */ 1913 tcg_gen_movi_i64(t1, INT64_MAX); 1914 t2 = tcg_constant_i64(0); 1915 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1916 } 1917 } 1918 } 1919 1920 /* Similarly with a vector and a scalar operand. */ 1921 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1922 TCGv_i64 val, bool u, bool d) 1923 { 1924 unsigned vsz = vec_full_reg_size(s); 1925 TCGv_ptr dptr, nptr; 1926 TCGv_i32 t32, desc; 1927 TCGv_i64 t64; 1928 1929 dptr = tcg_temp_new_ptr(); 1930 nptr = tcg_temp_new_ptr(); 1931 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd)); 1932 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn)); 1933 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 1934 1935 switch (esz) { 1936 case MO_8: 1937 t32 = tcg_temp_new_i32(); 1938 tcg_gen_extrl_i64_i32(t32, val); 1939 if (d) { 1940 tcg_gen_neg_i32(t32, t32); 1941 } 1942 if (u) { 1943 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 1944 } else { 1945 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 1946 } 1947 break; 1948 1949 case MO_16: 1950 t32 = tcg_temp_new_i32(); 1951 tcg_gen_extrl_i64_i32(t32, val); 1952 if (d) { 1953 tcg_gen_neg_i32(t32, t32); 1954 } 1955 if (u) { 1956 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 1957 } else { 1958 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 1959 } 1960 break; 1961 1962 case MO_32: 1963 t64 = tcg_temp_new_i64(); 1964 if (d) { 1965 tcg_gen_neg_i64(t64, val); 1966 } else { 1967 tcg_gen_mov_i64(t64, val); 1968 } 1969 if (u) { 1970 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 1971 } else { 1972 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 1973 } 1974 break; 1975 1976 case MO_64: 1977 if (u) { 1978 if (d) { 1979 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 1980 } else { 1981 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 1982 } 1983 } else if (d) { 1984 t64 = tcg_temp_new_i64(); 1985 tcg_gen_neg_i64(t64, val); 1986 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 1987 } else { 1988 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 1989 } 1990 break; 1991 1992 default: 1993 g_assert_not_reached(); 1994 } 1995 } 1996 1997 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 1998 { 1999 if (!dc_isar_feature(aa64_sve, s)) { 2000 return false; 2001 } 2002 if (sve_access_check(s)) { 2003 unsigned fullsz = vec_full_reg_size(s); 2004 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2005 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 2006 } 2007 return true; 2008 } 2009 2010 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 2011 { 2012 if (!dc_isar_feature(aa64_sve, s)) { 2013 return false; 2014 } 2015 if (sve_access_check(s)) { 2016 unsigned fullsz = vec_full_reg_size(s); 2017 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2018 int inc = numelem * a->imm * (a->d ? -1 : 1); 2019 TCGv_i64 reg = cpu_reg(s, a->rd); 2020 2021 tcg_gen_addi_i64(reg, reg, inc); 2022 } 2023 return true; 2024 } 2025 2026 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 2027 { 2028 if (!dc_isar_feature(aa64_sve, s)) { 2029 return false; 2030 } 2031 if (!sve_access_check(s)) { 2032 return true; 2033 } 2034 2035 unsigned fullsz = vec_full_reg_size(s); 2036 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2037 int inc = numelem * a->imm; 2038 TCGv_i64 reg = cpu_reg(s, a->rd); 2039 2040 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 2041 if (inc == 0) { 2042 if (a->u) { 2043 tcg_gen_ext32u_i64(reg, reg); 2044 } else { 2045 tcg_gen_ext32s_i64(reg, reg); 2046 } 2047 } else { 2048 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); 2049 } 2050 return true; 2051 } 2052 2053 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 2054 { 2055 if (!dc_isar_feature(aa64_sve, s)) { 2056 return false; 2057 } 2058 if (!sve_access_check(s)) { 2059 return true; 2060 } 2061 2062 unsigned fullsz = vec_full_reg_size(s); 2063 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2064 int inc = numelem * a->imm; 2065 TCGv_i64 reg = cpu_reg(s, a->rd); 2066 2067 if (inc != 0) { 2068 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); 2069 } 2070 return true; 2071 } 2072 2073 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2074 { 2075 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2076 return false; 2077 } 2078 2079 unsigned fullsz = vec_full_reg_size(s); 2080 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2081 int inc = numelem * a->imm; 2082 2083 if (inc != 0) { 2084 if (sve_access_check(s)) { 2085 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 2086 vec_full_reg_offset(s, a->rn), 2087 tcg_constant_i64(a->d ? -inc : inc), 2088 fullsz, fullsz); 2089 } 2090 } else { 2091 do_mov_z(s, a->rd, a->rn); 2092 } 2093 return true; 2094 } 2095 2096 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2097 { 2098 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2099 return false; 2100 } 2101 2102 unsigned fullsz = vec_full_reg_size(s); 2103 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2104 int inc = numelem * a->imm; 2105 2106 if (inc != 0) { 2107 if (sve_access_check(s)) { 2108 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 2109 tcg_constant_i64(inc), a->u, a->d); 2110 } 2111 } else { 2112 do_mov_z(s, a->rd, a->rn); 2113 } 2114 return true; 2115 } 2116 2117 /* 2118 *** SVE Bitwise Immediate Group 2119 */ 2120 2121 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 2122 { 2123 uint64_t imm; 2124 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2125 extract32(a->dbm, 0, 6), 2126 extract32(a->dbm, 6, 6))) { 2127 return false; 2128 } 2129 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); 2130 } 2131 2132 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) 2133 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) 2134 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) 2135 2136 static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 2137 { 2138 uint64_t imm; 2139 2140 if (!dc_isar_feature(aa64_sve, s)) { 2141 return false; 2142 } 2143 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2144 extract32(a->dbm, 0, 6), 2145 extract32(a->dbm, 6, 6))) { 2146 return false; 2147 } 2148 if (sve_access_check(s)) { 2149 do_dupi_z(s, a->rd, imm); 2150 } 2151 return true; 2152 } 2153 2154 /* 2155 *** SVE Integer Wide Immediate - Predicated Group 2156 */ 2157 2158 /* Implement all merging copies. This is used for CPY (immediate), 2159 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2160 */ 2161 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2162 TCGv_i64 val) 2163 { 2164 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2165 static gen_cpy * const fns[4] = { 2166 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2167 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2168 }; 2169 unsigned vsz = vec_full_reg_size(s); 2170 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2171 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2172 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2173 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2174 2175 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 2176 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn)); 2177 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 2178 2179 fns[esz](t_zd, t_zn, t_pg, val, desc); 2180 } 2181 2182 static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2183 { 2184 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2185 return false; 2186 } 2187 if (sve_access_check(s)) { 2188 /* Decode the VFP immediate. */ 2189 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2190 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); 2191 } 2192 return true; 2193 } 2194 2195 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2196 { 2197 if (!dc_isar_feature(aa64_sve, s)) { 2198 return false; 2199 } 2200 if (sve_access_check(s)) { 2201 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); 2202 } 2203 return true; 2204 } 2205 2206 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2207 { 2208 static gen_helper_gvec_2i * const fns[4] = { 2209 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2210 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2211 }; 2212 2213 if (!dc_isar_feature(aa64_sve, s)) { 2214 return false; 2215 } 2216 if (sve_access_check(s)) { 2217 unsigned vsz = vec_full_reg_size(s); 2218 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2219 pred_full_reg_offset(s, a->pg), 2220 tcg_constant_i64(a->imm), 2221 vsz, vsz, 0, fns[a->esz]); 2222 } 2223 return true; 2224 } 2225 2226 /* 2227 *** SVE Permute Extract Group 2228 */ 2229 2230 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2231 { 2232 if (!sve_access_check(s)) { 2233 return true; 2234 } 2235 2236 unsigned vsz = vec_full_reg_size(s); 2237 unsigned n_ofs = imm >= vsz ? 0 : imm; 2238 unsigned n_siz = vsz - n_ofs; 2239 unsigned d = vec_full_reg_offset(s, rd); 2240 unsigned n = vec_full_reg_offset(s, rn); 2241 unsigned m = vec_full_reg_offset(s, rm); 2242 2243 /* Use host vector move insns if we have appropriate sizes 2244 * and no unfortunate overlap. 2245 */ 2246 if (m != d 2247 && n_ofs == size_for_gvec(n_ofs) 2248 && n_siz == size_for_gvec(n_siz) 2249 && (d != n || n_siz <= n_ofs)) { 2250 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2251 if (n_ofs != 0) { 2252 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2253 } 2254 } else { 2255 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2256 } 2257 return true; 2258 } 2259 2260 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) 2261 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) 2262 2263 /* 2264 *** SVE Permute - Unpredicated Group 2265 */ 2266 2267 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2268 { 2269 if (!dc_isar_feature(aa64_sve, s)) { 2270 return false; 2271 } 2272 if (sve_access_check(s)) { 2273 unsigned vsz = vec_full_reg_size(s); 2274 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2275 vsz, vsz, cpu_reg_sp(s, a->rn)); 2276 } 2277 return true; 2278 } 2279 2280 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2281 { 2282 if (!dc_isar_feature(aa64_sve, s)) { 2283 return false; 2284 } 2285 if ((a->imm & 0x1f) == 0) { 2286 return false; 2287 } 2288 if (sve_access_check(s)) { 2289 unsigned vsz = vec_full_reg_size(s); 2290 unsigned dofs = vec_full_reg_offset(s, a->rd); 2291 unsigned esz, index; 2292 2293 esz = ctz32(a->imm); 2294 index = a->imm >> (esz + 1); 2295 2296 if ((index << esz) < vsz) { 2297 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2298 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2299 } else { 2300 /* 2301 * While dup_mem handles 128-bit elements, dup_imm does not. 2302 * Thankfully element size doesn't matter for splatting zero. 2303 */ 2304 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2305 } 2306 } 2307 return true; 2308 } 2309 2310 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2311 { 2312 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2313 static gen_insr * const fns[4] = { 2314 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2315 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2316 }; 2317 unsigned vsz = vec_full_reg_size(s); 2318 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 2319 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2320 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2321 2322 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd)); 2323 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2324 2325 fns[a->esz](t_zd, t_zn, val, desc); 2326 } 2327 2328 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2329 { 2330 if (!dc_isar_feature(aa64_sve, s)) { 2331 return false; 2332 } 2333 if (sve_access_check(s)) { 2334 TCGv_i64 t = tcg_temp_new_i64(); 2335 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2336 do_insr_i64(s, a, t); 2337 } 2338 return true; 2339 } 2340 2341 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2342 { 2343 if (!dc_isar_feature(aa64_sve, s)) { 2344 return false; 2345 } 2346 if (sve_access_check(s)) { 2347 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2348 } 2349 return true; 2350 } 2351 2352 static gen_helper_gvec_2 * const rev_fns[4] = { 2353 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2354 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2355 }; 2356 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) 2357 2358 static gen_helper_gvec_3 * const sve_tbl_fns[4] = { 2359 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2360 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2361 }; 2362 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) 2363 2364 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { 2365 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2366 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2367 }; 2368 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], 2369 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) 2370 2371 static gen_helper_gvec_3 * const tbx_fns[4] = { 2372 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2373 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2374 }; 2375 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) 2376 2377 static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2378 { 2379 static gen_helper_gvec_2 * const fns[4][2] = { 2380 { NULL, NULL }, 2381 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2382 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2383 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2384 }; 2385 2386 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 2387 return false; 2388 } 2389 if (sve_access_check(s)) { 2390 unsigned vsz = vec_full_reg_size(s); 2391 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2392 vec_full_reg_offset(s, a->rn) 2393 + (a->h ? vsz / 2 : 0), 2394 vsz, vsz, 0, fns[a->esz][a->u]); 2395 } 2396 return true; 2397 } 2398 2399 /* 2400 *** SVE Permute - Predicates Group 2401 */ 2402 2403 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2404 gen_helper_gvec_3 *fn) 2405 { 2406 if (!sve_access_check(s)) { 2407 return true; 2408 } 2409 2410 unsigned vsz = pred_full_reg_size(s); 2411 2412 TCGv_ptr t_d = tcg_temp_new_ptr(); 2413 TCGv_ptr t_n = tcg_temp_new_ptr(); 2414 TCGv_ptr t_m = tcg_temp_new_ptr(); 2415 uint32_t desc = 0; 2416 2417 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2418 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2419 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2420 2421 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2422 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2423 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm)); 2424 2425 fn(t_d, t_n, t_m, tcg_constant_i32(desc)); 2426 return true; 2427 } 2428 2429 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2430 gen_helper_gvec_2 *fn) 2431 { 2432 if (!sve_access_check(s)) { 2433 return true; 2434 } 2435 2436 unsigned vsz = pred_full_reg_size(s); 2437 TCGv_ptr t_d = tcg_temp_new_ptr(); 2438 TCGv_ptr t_n = tcg_temp_new_ptr(); 2439 uint32_t desc = 0; 2440 2441 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2442 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2443 2444 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2445 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2446 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2447 2448 fn(t_d, t_n, tcg_constant_i32(desc)); 2449 return true; 2450 } 2451 2452 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) 2453 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) 2454 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) 2455 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) 2456 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) 2457 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) 2458 2459 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) 2460 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) 2461 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) 2462 2463 /* 2464 *** SVE Permute - Interleaving Group 2465 */ 2466 2467 static gen_helper_gvec_3 * const zip_fns[4] = { 2468 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2469 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2470 }; 2471 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2472 zip_fns[a->esz], a, 0) 2473 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2474 zip_fns[a->esz], a, vec_full_reg_size(s) / 2) 2475 2476 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2477 gen_helper_sve2_zip_q, a, 0) 2478 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2479 gen_helper_sve2_zip_q, a, 2480 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) 2481 2482 static gen_helper_gvec_3 * const uzp_fns[4] = { 2483 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2484 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2485 }; 2486 2487 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2488 uzp_fns[a->esz], a, 0) 2489 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2490 uzp_fns[a->esz], a, 1 << a->esz) 2491 2492 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2493 gen_helper_sve2_uzp_q, a, 0) 2494 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2495 gen_helper_sve2_uzp_q, a, 16) 2496 2497 static gen_helper_gvec_3 * const trn_fns[4] = { 2498 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2499 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2500 }; 2501 2502 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, 2503 trn_fns[a->esz], a, 0) 2504 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, 2505 trn_fns[a->esz], a, 1 << a->esz) 2506 2507 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2508 gen_helper_sve2_trn_q, a, 0) 2509 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, 2510 gen_helper_sve2_trn_q, a, 16) 2511 2512 /* 2513 *** SVE Permute Vector - Predicated Group 2514 */ 2515 2516 static gen_helper_gvec_3 * const compact_fns[4] = { 2517 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2518 }; 2519 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, 2520 compact_fns[a->esz], a, 0) 2521 2522 /* Call the helper that computes the ARM LastActiveElement pseudocode 2523 * function, scaled by the element size. This includes the not found 2524 * indication; e.g. not found for esz=3 is -8. 2525 */ 2526 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2527 { 2528 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2529 * round up, as we do elsewhere, because we need the exact size. 2530 */ 2531 TCGv_ptr t_p = tcg_temp_new_ptr(); 2532 unsigned desc = 0; 2533 2534 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2535 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2536 2537 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg)); 2538 2539 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); 2540 } 2541 2542 /* Increment LAST to the offset of the next element in the vector, 2543 * wrapping around to 0. 2544 */ 2545 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2546 { 2547 unsigned vsz = vec_full_reg_size(s); 2548 2549 tcg_gen_addi_i32(last, last, 1 << esz); 2550 if (is_power_of_2(vsz)) { 2551 tcg_gen_andi_i32(last, last, vsz - 1); 2552 } else { 2553 TCGv_i32 max = tcg_constant_i32(vsz); 2554 TCGv_i32 zero = tcg_constant_i32(0); 2555 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2556 } 2557 } 2558 2559 /* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2560 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2561 { 2562 unsigned vsz = vec_full_reg_size(s); 2563 2564 if (is_power_of_2(vsz)) { 2565 tcg_gen_andi_i32(last, last, vsz - 1); 2566 } else { 2567 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); 2568 TCGv_i32 zero = tcg_constant_i32(0); 2569 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2570 } 2571 } 2572 2573 /* Load an unsigned element of ESZ from BASE+OFS. */ 2574 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2575 { 2576 TCGv_i64 r = tcg_temp_new_i64(); 2577 2578 switch (esz) { 2579 case 0: 2580 tcg_gen_ld8u_i64(r, base, ofs); 2581 break; 2582 case 1: 2583 tcg_gen_ld16u_i64(r, base, ofs); 2584 break; 2585 case 2: 2586 tcg_gen_ld32u_i64(r, base, ofs); 2587 break; 2588 case 3: 2589 tcg_gen_ld_i64(r, base, ofs); 2590 break; 2591 default: 2592 g_assert_not_reached(); 2593 } 2594 return r; 2595 } 2596 2597 /* Load an unsigned element of ESZ from RM[LAST]. */ 2598 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2599 int rm, int esz) 2600 { 2601 TCGv_ptr p = tcg_temp_new_ptr(); 2602 2603 /* Convert offset into vector into offset into ENV. 2604 * The final adjustment for the vector register base 2605 * is added via constant offset to the load. 2606 */ 2607 #if HOST_BIG_ENDIAN 2608 /* Adjust for element ordering. See vec_reg_offset. */ 2609 if (esz < 3) { 2610 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2611 } 2612 #endif 2613 tcg_gen_ext_i32_ptr(p, last); 2614 tcg_gen_add_ptr(p, p, cpu_env); 2615 2616 return load_esz(p, vec_full_reg_offset(s, rm), esz); 2617 } 2618 2619 /* Compute CLAST for a Zreg. */ 2620 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2621 { 2622 TCGv_i32 last; 2623 TCGLabel *over; 2624 TCGv_i64 ele; 2625 unsigned vsz, esz = a->esz; 2626 2627 if (!sve_access_check(s)) { 2628 return true; 2629 } 2630 2631 last = tcg_temp_new_i32(); 2632 over = gen_new_label(); 2633 2634 find_last_active(s, last, esz, a->pg); 2635 2636 /* There is of course no movcond for a 2048-bit vector, 2637 * so we must branch over the actual store. 2638 */ 2639 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2640 2641 if (!before) { 2642 incr_last_active(s, last, esz); 2643 } 2644 2645 ele = load_last_active(s, last, a->rm, esz); 2646 2647 vsz = vec_full_reg_size(s); 2648 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2649 2650 /* If this insn used MOVPRFX, we may need a second move. */ 2651 if (a->rd != a->rn) { 2652 TCGLabel *done = gen_new_label(); 2653 tcg_gen_br(done); 2654 2655 gen_set_label(over); 2656 do_mov_z(s, a->rd, a->rn); 2657 2658 gen_set_label(done); 2659 } else { 2660 gen_set_label(over); 2661 } 2662 return true; 2663 } 2664 2665 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) 2666 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) 2667 2668 /* Compute CLAST for a scalar. */ 2669 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2670 bool before, TCGv_i64 reg_val) 2671 { 2672 TCGv_i32 last = tcg_temp_new_i32(); 2673 TCGv_i64 ele, cmp; 2674 2675 find_last_active(s, last, esz, pg); 2676 2677 /* Extend the original value of last prior to incrementing. */ 2678 cmp = tcg_temp_new_i64(); 2679 tcg_gen_ext_i32_i64(cmp, last); 2680 2681 if (!before) { 2682 incr_last_active(s, last, esz); 2683 } 2684 2685 /* The conceit here is that while last < 0 indicates not found, after 2686 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address 2687 * from which we can load garbage. We then discard the garbage with 2688 * a conditional move. 2689 */ 2690 ele = load_last_active(s, last, rm, esz); 2691 2692 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), 2693 ele, reg_val); 2694 } 2695 2696 /* Compute CLAST for a Vreg. */ 2697 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2698 { 2699 if (sve_access_check(s)) { 2700 int esz = a->esz; 2701 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2702 TCGv_i64 reg = load_esz(cpu_env, ofs, esz); 2703 2704 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2705 write_fp_dreg(s, a->rd, reg); 2706 } 2707 return true; 2708 } 2709 2710 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) 2711 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) 2712 2713 /* Compute CLAST for a Xreg. */ 2714 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 2715 { 2716 TCGv_i64 reg; 2717 2718 if (!sve_access_check(s)) { 2719 return true; 2720 } 2721 2722 reg = cpu_reg(s, a->rd); 2723 switch (a->esz) { 2724 case 0: 2725 tcg_gen_ext8u_i64(reg, reg); 2726 break; 2727 case 1: 2728 tcg_gen_ext16u_i64(reg, reg); 2729 break; 2730 case 2: 2731 tcg_gen_ext32u_i64(reg, reg); 2732 break; 2733 case 3: 2734 break; 2735 default: 2736 g_assert_not_reached(); 2737 } 2738 2739 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 2740 return true; 2741 } 2742 2743 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) 2744 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) 2745 2746 /* Compute LAST for a scalar. */ 2747 static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 2748 int pg, int rm, bool before) 2749 { 2750 TCGv_i32 last = tcg_temp_new_i32(); 2751 2752 find_last_active(s, last, esz, pg); 2753 if (before) { 2754 wrap_last_active(s, last, esz); 2755 } else { 2756 incr_last_active(s, last, esz); 2757 } 2758 2759 return load_last_active(s, last, rm, esz); 2760 } 2761 2762 /* Compute LAST for a Vreg. */ 2763 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2764 { 2765 if (sve_access_check(s)) { 2766 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2767 write_fp_dreg(s, a->rd, val); 2768 } 2769 return true; 2770 } 2771 2772 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) 2773 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) 2774 2775 /* Compute LAST for a Xreg. */ 2776 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 2777 { 2778 if (sve_access_check(s)) { 2779 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 2780 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 2781 } 2782 return true; 2783 } 2784 2785 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) 2786 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) 2787 2788 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 2789 { 2790 if (!dc_isar_feature(aa64_sve, s)) { 2791 return false; 2792 } 2793 if (sve_access_check(s)) { 2794 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 2795 } 2796 return true; 2797 } 2798 2799 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 2800 { 2801 if (!dc_isar_feature(aa64_sve, s)) { 2802 return false; 2803 } 2804 if (sve_access_check(s)) { 2805 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 2806 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz); 2807 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 2808 } 2809 return true; 2810 } 2811 2812 static gen_helper_gvec_3 * const revb_fns[4] = { 2813 NULL, gen_helper_sve_revb_h, 2814 gen_helper_sve_revb_s, gen_helper_sve_revb_d, 2815 }; 2816 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) 2817 2818 static gen_helper_gvec_3 * const revh_fns[4] = { 2819 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, 2820 }; 2821 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) 2822 2823 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, 2824 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) 2825 2826 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) 2827 2828 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, 2829 gen_helper_sve_splice, a, a->esz) 2830 2831 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, 2832 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) 2833 2834 /* 2835 *** SVE Integer Compare - Vectors Group 2836 */ 2837 2838 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 2839 gen_helper_gvec_flags_4 *gen_fn) 2840 { 2841 TCGv_ptr pd, zn, zm, pg; 2842 unsigned vsz; 2843 TCGv_i32 t; 2844 2845 if (gen_fn == NULL) { 2846 return false; 2847 } 2848 if (!sve_access_check(s)) { 2849 return true; 2850 } 2851 2852 vsz = vec_full_reg_size(s); 2853 t = tcg_temp_new_i32(); 2854 pd = tcg_temp_new_ptr(); 2855 zn = tcg_temp_new_ptr(); 2856 zm = tcg_temp_new_ptr(); 2857 pg = tcg_temp_new_ptr(); 2858 2859 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 2860 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2861 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm)); 2862 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 2863 2864 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); 2865 2866 do_pred_flags(t); 2867 return true; 2868 } 2869 2870 #define DO_PPZZ(NAME, name) \ 2871 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ 2872 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 2873 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 2874 }; \ 2875 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ 2876 a, name##_ppzz_fns[a->esz]) 2877 2878 DO_PPZZ(CMPEQ, cmpeq) 2879 DO_PPZZ(CMPNE, cmpne) 2880 DO_PPZZ(CMPGT, cmpgt) 2881 DO_PPZZ(CMPGE, cmpge) 2882 DO_PPZZ(CMPHI, cmphi) 2883 DO_PPZZ(CMPHS, cmphs) 2884 2885 #undef DO_PPZZ 2886 2887 #define DO_PPZW(NAME, name) \ 2888 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ 2889 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 2890 gen_helper_sve_##name##_ppzw_s, NULL \ 2891 }; \ 2892 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ 2893 a, name##_ppzw_fns[a->esz]) 2894 2895 DO_PPZW(CMPEQ, cmpeq) 2896 DO_PPZW(CMPNE, cmpne) 2897 DO_PPZW(CMPGT, cmpgt) 2898 DO_PPZW(CMPGE, cmpge) 2899 DO_PPZW(CMPHI, cmphi) 2900 DO_PPZW(CMPHS, cmphs) 2901 DO_PPZW(CMPLT, cmplt) 2902 DO_PPZW(CMPLE, cmple) 2903 DO_PPZW(CMPLO, cmplo) 2904 DO_PPZW(CMPLS, cmpls) 2905 2906 #undef DO_PPZW 2907 2908 /* 2909 *** SVE Integer Compare - Immediate Groups 2910 */ 2911 2912 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 2913 gen_helper_gvec_flags_3 *gen_fn) 2914 { 2915 TCGv_ptr pd, zn, pg; 2916 unsigned vsz; 2917 TCGv_i32 t; 2918 2919 if (gen_fn == NULL) { 2920 return false; 2921 } 2922 if (!sve_access_check(s)) { 2923 return true; 2924 } 2925 2926 vsz = vec_full_reg_size(s); 2927 t = tcg_temp_new_i32(); 2928 pd = tcg_temp_new_ptr(); 2929 zn = tcg_temp_new_ptr(); 2930 pg = tcg_temp_new_ptr(); 2931 2932 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 2933 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2934 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 2935 2936 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); 2937 2938 do_pred_flags(t); 2939 return true; 2940 } 2941 2942 #define DO_PPZI(NAME, name) \ 2943 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ 2944 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 2945 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 2946 }; \ 2947 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ 2948 name##_ppzi_fns[a->esz]) 2949 2950 DO_PPZI(CMPEQ, cmpeq) 2951 DO_PPZI(CMPNE, cmpne) 2952 DO_PPZI(CMPGT, cmpgt) 2953 DO_PPZI(CMPGE, cmpge) 2954 DO_PPZI(CMPHI, cmphi) 2955 DO_PPZI(CMPHS, cmphs) 2956 DO_PPZI(CMPLT, cmplt) 2957 DO_PPZI(CMPLE, cmple) 2958 DO_PPZI(CMPLO, cmplo) 2959 DO_PPZI(CMPLS, cmpls) 2960 2961 #undef DO_PPZI 2962 2963 /* 2964 *** SVE Partition Break Group 2965 */ 2966 2967 static bool do_brk3(DisasContext *s, arg_rprr_s *a, 2968 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 2969 { 2970 if (!sve_access_check(s)) { 2971 return true; 2972 } 2973 2974 unsigned vsz = pred_full_reg_size(s); 2975 2976 /* Predicate sizes may be smaller and cannot use simd_desc. */ 2977 TCGv_ptr d = tcg_temp_new_ptr(); 2978 TCGv_ptr n = tcg_temp_new_ptr(); 2979 TCGv_ptr m = tcg_temp_new_ptr(); 2980 TCGv_ptr g = tcg_temp_new_ptr(); 2981 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 2982 2983 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 2984 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 2985 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm)); 2986 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 2987 2988 if (a->s) { 2989 TCGv_i32 t = tcg_temp_new_i32(); 2990 fn_s(t, d, n, m, g, desc); 2991 do_pred_flags(t); 2992 } else { 2993 fn(d, n, m, g, desc); 2994 } 2995 return true; 2996 } 2997 2998 static bool do_brk2(DisasContext *s, arg_rpr_s *a, 2999 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 3000 { 3001 if (!sve_access_check(s)) { 3002 return true; 3003 } 3004 3005 unsigned vsz = pred_full_reg_size(s); 3006 3007 /* Predicate sizes may be smaller and cannot use simd_desc. */ 3008 TCGv_ptr d = tcg_temp_new_ptr(); 3009 TCGv_ptr n = tcg_temp_new_ptr(); 3010 TCGv_ptr g = tcg_temp_new_ptr(); 3011 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3012 3013 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 3014 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 3015 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 3016 3017 if (a->s) { 3018 TCGv_i32 t = tcg_temp_new_i32(); 3019 fn_s(t, d, n, g, desc); 3020 do_pred_flags(t); 3021 } else { 3022 fn(d, n, g, desc); 3023 } 3024 return true; 3025 } 3026 3027 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, 3028 gen_helper_sve_brkpa, gen_helper_sve_brkpas) 3029 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, 3030 gen_helper_sve_brkpb, gen_helper_sve_brkpbs) 3031 3032 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, 3033 gen_helper_sve_brka_m, gen_helper_sve_brkas_m) 3034 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, 3035 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) 3036 3037 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, 3038 gen_helper_sve_brka_z, gen_helper_sve_brkas_z) 3039 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, 3040 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) 3041 3042 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, 3043 gen_helper_sve_brkn, gen_helper_sve_brkns) 3044 3045 /* 3046 *** SVE Predicate Count Group 3047 */ 3048 3049 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 3050 { 3051 unsigned psz = pred_full_reg_size(s); 3052 3053 if (psz <= 8) { 3054 uint64_t psz_mask; 3055 3056 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn)); 3057 if (pn != pg) { 3058 TCGv_i64 g = tcg_temp_new_i64(); 3059 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg)); 3060 tcg_gen_and_i64(val, val, g); 3061 } 3062 3063 /* Reduce the pred_esz_masks value simply to reduce the 3064 * size of the code generated here. 3065 */ 3066 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 3067 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 3068 3069 tcg_gen_ctpop_i64(val, val); 3070 } else { 3071 TCGv_ptr t_pn = tcg_temp_new_ptr(); 3072 TCGv_ptr t_pg = tcg_temp_new_ptr(); 3073 unsigned desc = 0; 3074 3075 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 3076 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 3077 3078 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn)); 3079 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 3080 3081 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); 3082 } 3083 } 3084 3085 static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 3086 { 3087 if (!dc_isar_feature(aa64_sve, s)) { 3088 return false; 3089 } 3090 if (sve_access_check(s)) { 3091 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 3092 } 3093 return true; 3094 } 3095 3096 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 3097 { 3098 if (!dc_isar_feature(aa64_sve, s)) { 3099 return false; 3100 } 3101 if (sve_access_check(s)) { 3102 TCGv_i64 reg = cpu_reg(s, a->rd); 3103 TCGv_i64 val = tcg_temp_new_i64(); 3104 3105 do_cntp(s, val, a->esz, a->pg, a->pg); 3106 if (a->d) { 3107 tcg_gen_sub_i64(reg, reg, val); 3108 } else { 3109 tcg_gen_add_i64(reg, reg, val); 3110 } 3111 } 3112 return true; 3113 } 3114 3115 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3116 { 3117 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3118 return false; 3119 } 3120 if (sve_access_check(s)) { 3121 unsigned vsz = vec_full_reg_size(s); 3122 TCGv_i64 val = tcg_temp_new_i64(); 3123 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 3124 3125 do_cntp(s, val, a->esz, a->pg, a->pg); 3126 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 3127 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 3128 } 3129 return true; 3130 } 3131 3132 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 3133 { 3134 if (!dc_isar_feature(aa64_sve, s)) { 3135 return false; 3136 } 3137 if (sve_access_check(s)) { 3138 TCGv_i64 reg = cpu_reg(s, a->rd); 3139 TCGv_i64 val = tcg_temp_new_i64(); 3140 3141 do_cntp(s, val, a->esz, a->pg, a->pg); 3142 do_sat_addsub_32(reg, val, a->u, a->d); 3143 } 3144 return true; 3145 } 3146 3147 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 3148 { 3149 if (!dc_isar_feature(aa64_sve, s)) { 3150 return false; 3151 } 3152 if (sve_access_check(s)) { 3153 TCGv_i64 reg = cpu_reg(s, a->rd); 3154 TCGv_i64 val = tcg_temp_new_i64(); 3155 3156 do_cntp(s, val, a->esz, a->pg, a->pg); 3157 do_sat_addsub_64(reg, val, a->u, a->d); 3158 } 3159 return true; 3160 } 3161 3162 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3163 { 3164 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3165 return false; 3166 } 3167 if (sve_access_check(s)) { 3168 TCGv_i64 val = tcg_temp_new_i64(); 3169 do_cntp(s, val, a->esz, a->pg, a->pg); 3170 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3171 } 3172 return true; 3173 } 3174 3175 /* 3176 *** SVE Integer Compare Scalars Group 3177 */ 3178 3179 static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3180 { 3181 if (!dc_isar_feature(aa64_sve, s)) { 3182 return false; 3183 } 3184 if (!sve_access_check(s)) { 3185 return true; 3186 } 3187 3188 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3189 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3190 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3191 TCGv_i64 cmp = tcg_temp_new_i64(); 3192 3193 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3194 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3195 3196 /* VF = !NF & !CF. */ 3197 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3198 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3199 3200 /* Both NF and VF actually look at bit 31. */ 3201 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3202 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3203 return true; 3204 } 3205 3206 static bool trans_WHILE(DisasContext *s, arg_WHILE *a) 3207 { 3208 TCGv_i64 op0, op1, t0, t1, tmax; 3209 TCGv_i32 t2; 3210 TCGv_ptr ptr; 3211 unsigned vsz = vec_full_reg_size(s); 3212 unsigned desc = 0; 3213 TCGCond cond; 3214 uint64_t maxval; 3215 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3216 bool eq = a->eq == a->lt; 3217 3218 /* The greater-than conditions are all SVE2. */ 3219 if (a->lt 3220 ? !dc_isar_feature(aa64_sve, s) 3221 : !dc_isar_feature(aa64_sve2, s)) { 3222 return false; 3223 } 3224 if (!sve_access_check(s)) { 3225 return true; 3226 } 3227 3228 op0 = read_cpu_reg(s, a->rn, 1); 3229 op1 = read_cpu_reg(s, a->rm, 1); 3230 3231 if (!a->sf) { 3232 if (a->u) { 3233 tcg_gen_ext32u_i64(op0, op0); 3234 tcg_gen_ext32u_i64(op1, op1); 3235 } else { 3236 tcg_gen_ext32s_i64(op0, op0); 3237 tcg_gen_ext32s_i64(op1, op1); 3238 } 3239 } 3240 3241 /* For the helper, compress the different conditions into a computation 3242 * of how many iterations for which the condition is true. 3243 */ 3244 t0 = tcg_temp_new_i64(); 3245 t1 = tcg_temp_new_i64(); 3246 3247 if (a->lt) { 3248 tcg_gen_sub_i64(t0, op1, op0); 3249 if (a->u) { 3250 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3251 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3252 } else { 3253 maxval = a->sf ? INT64_MAX : INT32_MAX; 3254 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3255 } 3256 } else { 3257 tcg_gen_sub_i64(t0, op0, op1); 3258 if (a->u) { 3259 maxval = 0; 3260 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3261 } else { 3262 maxval = a->sf ? INT64_MIN : INT32_MIN; 3263 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3264 } 3265 } 3266 3267 tmax = tcg_constant_i64(vsz >> a->esz); 3268 if (eq) { 3269 /* Equality means one more iteration. */ 3270 tcg_gen_addi_i64(t0, t0, 1); 3271 3272 /* 3273 * For the less-than while, if op1 is maxval (and the only time 3274 * the addition above could overflow), then we produce an all-true 3275 * predicate by setting the count to the vector length. This is 3276 * because the pseudocode is described as an increment + compare 3277 * loop, and the maximum integer would always compare true. 3278 * Similarly, the greater-than while has the same issue with the 3279 * minimum integer due to the decrement + compare loop. 3280 */ 3281 tcg_gen_movi_i64(t1, maxval); 3282 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3283 } 3284 3285 /* Bound to the maximum. */ 3286 tcg_gen_umin_i64(t0, t0, tmax); 3287 3288 /* Set the count to zero if the condition is false. */ 3289 tcg_gen_movi_i64(t1, 0); 3290 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3291 3292 /* Since we're bounded, pass as a 32-bit type. */ 3293 t2 = tcg_temp_new_i32(); 3294 tcg_gen_extrl_i64_i32(t2, t0); 3295 3296 /* Scale elements to bits. */ 3297 tcg_gen_shli_i32(t2, t2, a->esz); 3298 3299 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3300 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3301 3302 ptr = tcg_temp_new_ptr(); 3303 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3304 3305 if (a->lt) { 3306 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3307 } else { 3308 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); 3309 } 3310 do_pred_flags(t2); 3311 return true; 3312 } 3313 3314 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3315 { 3316 TCGv_i64 op0, op1, diff, t1, tmax; 3317 TCGv_i32 t2; 3318 TCGv_ptr ptr; 3319 unsigned vsz = vec_full_reg_size(s); 3320 unsigned desc = 0; 3321 3322 if (!dc_isar_feature(aa64_sve2, s)) { 3323 return false; 3324 } 3325 if (!sve_access_check(s)) { 3326 return true; 3327 } 3328 3329 op0 = read_cpu_reg(s, a->rn, 1); 3330 op1 = read_cpu_reg(s, a->rm, 1); 3331 3332 tmax = tcg_constant_i64(vsz); 3333 diff = tcg_temp_new_i64(); 3334 3335 if (a->rw) { 3336 /* WHILERW */ 3337 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3338 t1 = tcg_temp_new_i64(); 3339 tcg_gen_sub_i64(diff, op0, op1); 3340 tcg_gen_sub_i64(t1, op1, op0); 3341 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3342 /* Round down to a multiple of ESIZE. */ 3343 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3344 /* If op1 == op0, diff == 0, and the condition is always true. */ 3345 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3346 } else { 3347 /* WHILEWR */ 3348 tcg_gen_sub_i64(diff, op1, op0); 3349 /* Round down to a multiple of ESIZE. */ 3350 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3351 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3352 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3353 } 3354 3355 /* Bound to the maximum. */ 3356 tcg_gen_umin_i64(diff, diff, tmax); 3357 3358 /* Since we're bounded, pass as a 32-bit type. */ 3359 t2 = tcg_temp_new_i32(); 3360 tcg_gen_extrl_i64_i32(t2, diff); 3361 3362 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3363 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3364 3365 ptr = tcg_temp_new_ptr(); 3366 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3367 3368 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); 3369 do_pred_flags(t2); 3370 return true; 3371 } 3372 3373 /* 3374 *** SVE Integer Wide Immediate - Unpredicated Group 3375 */ 3376 3377 static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3378 { 3379 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3380 return false; 3381 } 3382 if (sve_access_check(s)) { 3383 unsigned vsz = vec_full_reg_size(s); 3384 int dofs = vec_full_reg_offset(s, a->rd); 3385 uint64_t imm; 3386 3387 /* Decode the VFP immediate. */ 3388 imm = vfp_expand_imm(a->esz, a->imm); 3389 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3390 } 3391 return true; 3392 } 3393 3394 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3395 { 3396 if (!dc_isar_feature(aa64_sve, s)) { 3397 return false; 3398 } 3399 if (sve_access_check(s)) { 3400 unsigned vsz = vec_full_reg_size(s); 3401 int dofs = vec_full_reg_offset(s, a->rd); 3402 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3403 } 3404 return true; 3405 } 3406 3407 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) 3408 3409 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3410 { 3411 a->imm = -a->imm; 3412 return trans_ADD_zzi(s, a); 3413 } 3414 3415 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3416 { 3417 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3418 static const GVecGen2s op[4] = { 3419 { .fni8 = tcg_gen_vec_sub8_i64, 3420 .fniv = tcg_gen_sub_vec, 3421 .fno = gen_helper_sve_subri_b, 3422 .opt_opc = vecop_list, 3423 .vece = MO_8, 3424 .scalar_first = true }, 3425 { .fni8 = tcg_gen_vec_sub16_i64, 3426 .fniv = tcg_gen_sub_vec, 3427 .fno = gen_helper_sve_subri_h, 3428 .opt_opc = vecop_list, 3429 .vece = MO_16, 3430 .scalar_first = true }, 3431 { .fni4 = tcg_gen_sub_i32, 3432 .fniv = tcg_gen_sub_vec, 3433 .fno = gen_helper_sve_subri_s, 3434 .opt_opc = vecop_list, 3435 .vece = MO_32, 3436 .scalar_first = true }, 3437 { .fni8 = tcg_gen_sub_i64, 3438 .fniv = tcg_gen_sub_vec, 3439 .fno = gen_helper_sve_subri_d, 3440 .opt_opc = vecop_list, 3441 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3442 .vece = MO_64, 3443 .scalar_first = true } 3444 }; 3445 3446 if (!dc_isar_feature(aa64_sve, s)) { 3447 return false; 3448 } 3449 if (sve_access_check(s)) { 3450 unsigned vsz = vec_full_reg_size(s); 3451 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3452 vec_full_reg_offset(s, a->rn), 3453 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); 3454 } 3455 return true; 3456 } 3457 3458 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) 3459 3460 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3461 { 3462 if (sve_access_check(s)) { 3463 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, 3464 tcg_constant_i64(a->imm), u, d); 3465 } 3466 return true; 3467 } 3468 3469 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) 3470 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) 3471 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) 3472 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) 3473 3474 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3475 { 3476 if (sve_access_check(s)) { 3477 unsigned vsz = vec_full_reg_size(s); 3478 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3479 vec_full_reg_offset(s, a->rn), 3480 tcg_constant_i64(a->imm), vsz, vsz, 0, fn); 3481 } 3482 return true; 3483 } 3484 3485 #define DO_ZZI(NAME, name) \ 3486 static gen_helper_gvec_2i * const name##i_fns[4] = { \ 3487 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3488 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3489 }; \ 3490 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) 3491 3492 DO_ZZI(SMAX, smax) 3493 DO_ZZI(UMAX, umax) 3494 DO_ZZI(SMIN, smin) 3495 DO_ZZI(UMIN, umin) 3496 3497 #undef DO_ZZI 3498 3499 static gen_helper_gvec_4 * const dot_fns[2][2] = { 3500 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, 3501 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } 3502 }; 3503 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, 3504 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) 3505 3506 /* 3507 * SVE Multiply - Indexed 3508 */ 3509 3510 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3511 gen_helper_gvec_sdot_idx_b, a) 3512 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3513 gen_helper_gvec_sdot_idx_h, a) 3514 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, 3515 gen_helper_gvec_udot_idx_b, a) 3516 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, 3517 gen_helper_gvec_udot_idx_h, a) 3518 3519 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3520 gen_helper_gvec_sudot_idx_b, a) 3521 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, 3522 gen_helper_gvec_usdot_idx_b, a) 3523 3524 #define DO_SVE2_RRX(NAME, FUNC) \ 3525 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3526 a->rd, a->rn, a->rm, a->index) 3527 3528 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) 3529 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) 3530 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) 3531 3532 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 3533 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 3534 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 3535 3536 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 3537 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 3538 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 3539 3540 #undef DO_SVE2_RRX 3541 3542 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 3543 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ 3544 a->rd, a->rn, a->rm, (a->index << 1) | TOP) 3545 3546 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 3547 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 3548 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 3549 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 3550 3551 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 3552 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 3553 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 3554 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 3555 3556 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 3557 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 3558 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 3559 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 3560 3561 #undef DO_SVE2_RRX_TB 3562 3563 #define DO_SVE2_RRXR(NAME, FUNC) \ 3564 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) 3565 3566 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 3567 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 3568 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 3569 3570 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 3571 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 3572 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 3573 3574 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 3575 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 3576 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 3577 3578 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 3579 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 3580 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 3581 3582 #undef DO_SVE2_RRXR 3583 3584 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 3585 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3586 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) 3587 3588 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 3589 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 3590 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 3591 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 3592 3593 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 3594 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 3595 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 3596 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 3597 3598 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 3599 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 3600 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 3601 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 3602 3603 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 3604 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 3605 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 3606 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 3607 3608 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 3609 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 3610 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 3611 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 3612 3613 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 3614 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 3615 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 3616 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 3617 3618 #undef DO_SVE2_RRXR_TB 3619 3620 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 3621 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ 3622 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) 3623 3624 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 3625 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 3626 3627 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 3628 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 3629 3630 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 3631 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 3632 3633 #undef DO_SVE2_RRXR_ROT 3634 3635 /* 3636 *** SVE Floating Point Multiply-Add Indexed Group 3637 */ 3638 3639 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) 3640 { 3641 static gen_helper_gvec_4_ptr * const fns[4] = { 3642 NULL, 3643 gen_helper_gvec_fmla_idx_h, 3644 gen_helper_gvec_fmla_idx_s, 3645 gen_helper_gvec_fmla_idx_d, 3646 }; 3647 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, 3648 (a->index << 1) | sub, 3649 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3650 } 3651 3652 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) 3653 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) 3654 3655 /* 3656 *** SVE Floating Point Multiply Indexed Group 3657 */ 3658 3659 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { 3660 NULL, gen_helper_gvec_fmul_idx_h, 3661 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, 3662 }; 3663 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, 3664 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, 3665 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3666 3667 /* 3668 *** SVE Floating Point Fast Reduction Group 3669 */ 3670 3671 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 3672 TCGv_ptr, TCGv_i32); 3673 3674 static bool do_reduce(DisasContext *s, arg_rpr_esz *a, 3675 gen_helper_fp_reduce *fn) 3676 { 3677 unsigned vsz, p2vsz; 3678 TCGv_i32 t_desc; 3679 TCGv_ptr t_zn, t_pg, status; 3680 TCGv_i64 temp; 3681 3682 if (fn == NULL) { 3683 return false; 3684 } 3685 if (!sve_access_check(s)) { 3686 return true; 3687 } 3688 3689 vsz = vec_full_reg_size(s); 3690 p2vsz = pow2ceil(vsz); 3691 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); 3692 temp = tcg_temp_new_i64(); 3693 t_zn = tcg_temp_new_ptr(); 3694 t_pg = tcg_temp_new_ptr(); 3695 3696 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 3697 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3698 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3699 3700 fn(temp, t_zn, t_pg, status, t_desc); 3701 3702 write_fp_dreg(s, a->rd, temp); 3703 return true; 3704 } 3705 3706 #define DO_VPZ(NAME, name) \ 3707 static gen_helper_fp_reduce * const name##_fns[4] = { \ 3708 NULL, gen_helper_sve_##name##_h, \ 3709 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3710 }; \ 3711 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) 3712 3713 DO_VPZ(FADDV, faddv) 3714 DO_VPZ(FMINNMV, fminnmv) 3715 DO_VPZ(FMAXNMV, fmaxnmv) 3716 DO_VPZ(FMINV, fminv) 3717 DO_VPZ(FMAXV, fmaxv) 3718 3719 #undef DO_VPZ 3720 3721 /* 3722 *** SVE Floating Point Unary Operations - Unpredicated Group 3723 */ 3724 3725 static gen_helper_gvec_2_ptr * const frecpe_fns[] = { 3726 NULL, gen_helper_gvec_frecpe_h, 3727 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, 3728 }; 3729 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) 3730 3731 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { 3732 NULL, gen_helper_gvec_frsqrte_h, 3733 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, 3734 }; 3735 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) 3736 3737 /* 3738 *** SVE Floating Point Compare with Zero Group 3739 */ 3740 3741 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 3742 gen_helper_gvec_3_ptr *fn) 3743 { 3744 if (fn == NULL) { 3745 return false; 3746 } 3747 if (sve_access_check(s)) { 3748 unsigned vsz = vec_full_reg_size(s); 3749 TCGv_ptr status = 3750 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3751 3752 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 3753 vec_full_reg_offset(s, a->rn), 3754 pred_full_reg_offset(s, a->pg), 3755 status, vsz, vsz, 0, fn); 3756 } 3757 return true; 3758 } 3759 3760 #define DO_PPZ(NAME, name) \ 3761 static gen_helper_gvec_3_ptr * const name##_fns[] = { \ 3762 NULL, gen_helper_sve_##name##_h, \ 3763 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 3764 }; \ 3765 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) 3766 3767 DO_PPZ(FCMGE_ppz0, fcmge0) 3768 DO_PPZ(FCMGT_ppz0, fcmgt0) 3769 DO_PPZ(FCMLE_ppz0, fcmle0) 3770 DO_PPZ(FCMLT_ppz0, fcmlt0) 3771 DO_PPZ(FCMEQ_ppz0, fcmeq0) 3772 DO_PPZ(FCMNE_ppz0, fcmne0) 3773 3774 #undef DO_PPZ 3775 3776 /* 3777 *** SVE floating-point trig multiply-add coefficient 3778 */ 3779 3780 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { 3781 NULL, gen_helper_sve_ftmad_h, 3782 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, 3783 }; 3784 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, 3785 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, 3786 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3787 3788 /* 3789 *** SVE Floating Point Accumulating Reduction Group 3790 */ 3791 3792 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 3793 { 3794 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 3795 TCGv_ptr, TCGv_ptr, TCGv_i32); 3796 static fadda_fn * const fns[3] = { 3797 gen_helper_sve_fadda_h, 3798 gen_helper_sve_fadda_s, 3799 gen_helper_sve_fadda_d, 3800 }; 3801 unsigned vsz = vec_full_reg_size(s); 3802 TCGv_ptr t_rm, t_pg, t_fpst; 3803 TCGv_i64 t_val; 3804 TCGv_i32 t_desc; 3805 3806 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { 3807 return false; 3808 } 3809 s->is_nonstreaming = true; 3810 if (!sve_access_check(s)) { 3811 return true; 3812 } 3813 3814 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 3815 t_rm = tcg_temp_new_ptr(); 3816 t_pg = tcg_temp_new_ptr(); 3817 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm)); 3818 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3819 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3820 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3821 3822 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 3823 3824 write_fp_dreg(s, a->rd, t_val); 3825 return true; 3826 } 3827 3828 /* 3829 *** SVE Floating Point Arithmetic - Unpredicated Group 3830 */ 3831 3832 #define DO_FP3(NAME, name) \ 3833 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ 3834 NULL, gen_helper_gvec_##name##_h, \ 3835 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 3836 }; \ 3837 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) 3838 3839 DO_FP3(FADD_zzz, fadd) 3840 DO_FP3(FSUB_zzz, fsub) 3841 DO_FP3(FMUL_zzz, fmul) 3842 DO_FP3(FRECPS, recps) 3843 DO_FP3(FRSQRTS, rsqrts) 3844 3845 #undef DO_FP3 3846 3847 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { 3848 NULL, gen_helper_gvec_ftsmul_h, 3849 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d 3850 }; 3851 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, 3852 ftsmul_fns[a->esz], a, 0) 3853 3854 /* 3855 *** SVE Floating Point Arithmetic - Predicated Group 3856 */ 3857 3858 #define DO_ZPZZ_FP(NAME, FEAT, name) \ 3859 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ 3860 NULL, gen_helper_##name##_h, \ 3861 gen_helper_##name##_s, gen_helper_##name##_d \ 3862 }; \ 3863 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) 3864 3865 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) 3866 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) 3867 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) 3868 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) 3869 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) 3870 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) 3871 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) 3872 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) 3873 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) 3874 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) 3875 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) 3876 3877 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 3878 TCGv_i64, TCGv_ptr, TCGv_i32); 3879 3880 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 3881 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 3882 { 3883 unsigned vsz = vec_full_reg_size(s); 3884 TCGv_ptr t_zd, t_zn, t_pg, status; 3885 TCGv_i32 desc; 3886 3887 t_zd = tcg_temp_new_ptr(); 3888 t_zn = tcg_temp_new_ptr(); 3889 t_pg = tcg_temp_new_ptr(); 3890 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd)); 3891 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn)); 3892 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 3893 3894 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 3895 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); 3896 fn(t_zd, t_zn, t_pg, scalar, status, desc); 3897 } 3898 3899 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 3900 gen_helper_sve_fp2scalar *fn) 3901 { 3902 if (fn == NULL) { 3903 return false; 3904 } 3905 if (sve_access_check(s)) { 3906 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, 3907 tcg_constant_i64(imm), fn); 3908 } 3909 return true; 3910 } 3911 3912 #define DO_FP_IMM(NAME, name, const0, const1) \ 3913 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ 3914 NULL, gen_helper_sve_##name##_h, \ 3915 gen_helper_sve_##name##_s, \ 3916 gen_helper_sve_##name##_d \ 3917 }; \ 3918 static uint64_t const name##_const[4][2] = { \ 3919 { -1, -1 }, \ 3920 { float16_##const0, float16_##const1 }, \ 3921 { float32_##const0, float32_##const1 }, \ 3922 { float64_##const0, float64_##const1 }, \ 3923 }; \ 3924 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ 3925 name##_const[a->esz][a->imm], name##_fns[a->esz]) 3926 3927 DO_FP_IMM(FADD, fadds, half, one) 3928 DO_FP_IMM(FSUB, fsubs, half, one) 3929 DO_FP_IMM(FMUL, fmuls, half, two) 3930 DO_FP_IMM(FSUBR, fsubrs, half, one) 3931 DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 3932 DO_FP_IMM(FMINNM, fminnms, zero, one) 3933 DO_FP_IMM(FMAX, fmaxs, zero, one) 3934 DO_FP_IMM(FMIN, fmins, zero, one) 3935 3936 #undef DO_FP_IMM 3937 3938 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 3939 gen_helper_gvec_4_ptr *fn) 3940 { 3941 if (fn == NULL) { 3942 return false; 3943 } 3944 if (sve_access_check(s)) { 3945 unsigned vsz = vec_full_reg_size(s); 3946 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 3947 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 3948 vec_full_reg_offset(s, a->rn), 3949 vec_full_reg_offset(s, a->rm), 3950 pred_full_reg_offset(s, a->pg), 3951 status, vsz, vsz, 0, fn); 3952 } 3953 return true; 3954 } 3955 3956 #define DO_FPCMP(NAME, name) \ 3957 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ 3958 NULL, gen_helper_sve_##name##_h, \ 3959 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3960 }; \ 3961 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) 3962 3963 DO_FPCMP(FCMGE, fcmge) 3964 DO_FPCMP(FCMGT, fcmgt) 3965 DO_FPCMP(FCMEQ, fcmeq) 3966 DO_FPCMP(FCMNE, fcmne) 3967 DO_FPCMP(FCMUO, fcmuo) 3968 DO_FPCMP(FACGE, facge) 3969 DO_FPCMP(FACGT, facgt) 3970 3971 #undef DO_FPCMP 3972 3973 static gen_helper_gvec_4_ptr * const fcadd_fns[] = { 3974 NULL, gen_helper_sve_fcadd_h, 3975 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, 3976 }; 3977 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], 3978 a->rd, a->rn, a->rm, a->pg, a->rot, 3979 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3980 3981 #define DO_FMLA(NAME, name) \ 3982 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ 3983 NULL, gen_helper_sve_##name##_h, \ 3984 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 3985 }; \ 3986 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ 3987 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ 3988 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 3989 3990 DO_FMLA(FMLA_zpzzz, fmla_zpzzz) 3991 DO_FMLA(FMLS_zpzzz, fmls_zpzzz) 3992 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) 3993 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) 3994 3995 #undef DO_FMLA 3996 3997 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { 3998 NULL, gen_helper_sve_fcmla_zpzzz_h, 3999 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, 4000 }; 4001 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], 4002 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, 4003 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4004 4005 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { 4006 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL 4007 }; 4008 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], 4009 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, 4010 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4011 4012 /* 4013 *** SVE Floating Point Unary Operations Predicated Group 4014 */ 4015 4016 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4017 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR) 4018 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4019 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR) 4020 4021 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 4022 gen_helper_sve_bfcvt, a, 0, FPST_FPCR) 4023 4024 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4025 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR) 4026 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4027 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR) 4028 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4029 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR) 4030 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4031 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR) 4032 4033 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4034 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16) 4035 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4036 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16) 4037 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4038 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16) 4039 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, 4040 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16) 4041 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4042 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16) 4043 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, 4044 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16) 4045 4046 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4047 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR) 4048 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4049 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR) 4050 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4051 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR) 4052 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4053 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR) 4054 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4055 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR) 4056 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4057 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR) 4058 4059 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4060 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR) 4061 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4062 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR) 4063 4064 static gen_helper_gvec_3_ptr * const frint_fns[] = { 4065 NULL, 4066 gen_helper_sve_frint_h, 4067 gen_helper_sve_frint_s, 4068 gen_helper_sve_frint_d 4069 }; 4070 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], 4071 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4072 4073 static gen_helper_gvec_3_ptr * const frintx_fns[] = { 4074 NULL, 4075 gen_helper_sve_frintx_h, 4076 gen_helper_sve_frintx_s, 4077 gen_helper_sve_frintx_d 4078 }; 4079 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], 4080 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4081 4082 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 4083 ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) 4084 { 4085 unsigned vsz; 4086 TCGv_i32 tmode; 4087 TCGv_ptr status; 4088 4089 if (fn == NULL) { 4090 return false; 4091 } 4092 if (!sve_access_check(s)) { 4093 return true; 4094 } 4095 4096 vsz = vec_full_reg_size(s); 4097 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4098 tmode = gen_set_rmode(mode, status); 4099 4100 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4101 vec_full_reg_offset(s, a->rn), 4102 pred_full_reg_offset(s, a->pg), 4103 status, vsz, vsz, 0, fn); 4104 4105 gen_restore_rmode(tmode, status); 4106 return true; 4107 } 4108 4109 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, 4110 FPROUNDING_TIEEVEN, frint_fns[a->esz]) 4111 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, 4112 FPROUNDING_POSINF, frint_fns[a->esz]) 4113 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, 4114 FPROUNDING_NEGINF, frint_fns[a->esz]) 4115 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, 4116 FPROUNDING_ZERO, frint_fns[a->esz]) 4117 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, 4118 FPROUNDING_TIEAWAY, frint_fns[a->esz]) 4119 4120 static gen_helper_gvec_3_ptr * const frecpx_fns[] = { 4121 NULL, gen_helper_sve_frecpx_h, 4122 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, 4123 }; 4124 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], 4125 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4126 4127 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { 4128 NULL, gen_helper_sve_fsqrt_h, 4129 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, 4130 }; 4131 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], 4132 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 4133 4134 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4135 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16) 4136 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4137 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16) 4138 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4139 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16) 4140 4141 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4142 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR) 4143 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4144 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR) 4145 4146 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4147 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR) 4148 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4149 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR) 4150 4151 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, 4152 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16) 4153 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, 4154 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16) 4155 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, 4156 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16) 4157 4158 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, 4159 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR) 4160 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, 4161 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR) 4162 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, 4163 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR) 4164 4165 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, 4166 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR) 4167 4168 /* 4169 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 4170 */ 4171 4172 /* Subroutine loading a vector register at VOFS of LEN bytes. 4173 * The load should begin at the address Rn + IMM. 4174 */ 4175 4176 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, 4177 int len, int rn, int imm) 4178 { 4179 int len_align = QEMU_ALIGN_DOWN(len, 8); 4180 int len_remain = len % 8; 4181 int nparts = len / 8 + ctpop8(len_remain); 4182 int midx = get_mem_index(s); 4183 TCGv_i64 dirty_addr, clean_addr, t0, t1; 4184 4185 dirty_addr = tcg_temp_new_i64(); 4186 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4187 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); 4188 4189 /* 4190 * Note that unpredicated load/store of vector/predicate registers 4191 * are defined as a stream of bytes, which equates to little-endian 4192 * operations on larger quantities. 4193 * Attempt to keep code expansion to a minimum by limiting the 4194 * amount of unrolling done. 4195 */ 4196 if (nparts <= 4) { 4197 int i; 4198 4199 t0 = tcg_temp_new_i64(); 4200 for (i = 0; i < len_align; i += 8) { 4201 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ); 4202 tcg_gen_st_i64(t0, base, vofs + i); 4203 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4204 } 4205 } else { 4206 TCGLabel *loop = gen_new_label(); 4207 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4208 4209 tcg_gen_movi_ptr(i, 0); 4210 gen_set_label(loop); 4211 4212 t0 = tcg_temp_new_i64(); 4213 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ); 4214 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4215 4216 tp = tcg_temp_new_ptr(); 4217 tcg_gen_add_ptr(tp, base, i); 4218 tcg_gen_addi_ptr(i, i, 8); 4219 tcg_gen_st_i64(t0, tp, vofs); 4220 4221 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4222 } 4223 4224 /* 4225 * Predicate register loads can be any multiple of 2. 4226 * Note that we still store the entire 64-bit unit into cpu_env. 4227 */ 4228 if (len_remain) { 4229 t0 = tcg_temp_new_i64(); 4230 switch (len_remain) { 4231 case 2: 4232 case 4: 4233 case 8: 4234 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 4235 MO_LE | ctz32(len_remain)); 4236 break; 4237 4238 case 6: 4239 t1 = tcg_temp_new_i64(); 4240 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL); 4241 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4242 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW); 4243 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 4244 break; 4245 4246 default: 4247 g_assert_not_reached(); 4248 } 4249 tcg_gen_st_i64(t0, base, vofs + len_align); 4250 } 4251 } 4252 4253 /* Similarly for stores. */ 4254 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, 4255 int len, int rn, int imm) 4256 { 4257 int len_align = QEMU_ALIGN_DOWN(len, 8); 4258 int len_remain = len % 8; 4259 int nparts = len / 8 + ctpop8(len_remain); 4260 int midx = get_mem_index(s); 4261 TCGv_i64 dirty_addr, clean_addr, t0; 4262 4263 dirty_addr = tcg_temp_new_i64(); 4264 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 4265 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); 4266 4267 /* Note that unpredicated load/store of vector/predicate registers 4268 * are defined as a stream of bytes, which equates to little-endian 4269 * operations on larger quantities. There is no nice way to force 4270 * a little-endian store for aarch64_be-linux-user out of line. 4271 * 4272 * Attempt to keep code expansion to a minimum by limiting the 4273 * amount of unrolling done. 4274 */ 4275 if (nparts <= 4) { 4276 int i; 4277 4278 t0 = tcg_temp_new_i64(); 4279 for (i = 0; i < len_align; i += 8) { 4280 tcg_gen_ld_i64(t0, base, vofs + i); 4281 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ); 4282 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4283 } 4284 } else { 4285 TCGLabel *loop = gen_new_label(); 4286 TCGv_ptr tp, i = tcg_temp_new_ptr(); 4287 4288 tcg_gen_movi_ptr(i, 0); 4289 gen_set_label(loop); 4290 4291 t0 = tcg_temp_new_i64(); 4292 tp = tcg_temp_new_ptr(); 4293 tcg_gen_add_ptr(tp, base, i); 4294 tcg_gen_ld_i64(t0, tp, vofs); 4295 tcg_gen_addi_ptr(i, i, 8); 4296 4297 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ); 4298 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4299 4300 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 4301 } 4302 4303 /* Predicate register stores can be any multiple of 2. */ 4304 if (len_remain) { 4305 t0 = tcg_temp_new_i64(); 4306 tcg_gen_ld_i64(t0, base, vofs + len_align); 4307 4308 switch (len_remain) { 4309 case 2: 4310 case 4: 4311 case 8: 4312 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 4313 MO_LE | ctz32(len_remain)); 4314 break; 4315 4316 case 6: 4317 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL); 4318 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 4319 tcg_gen_shri_i64(t0, t0, 32); 4320 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW); 4321 break; 4322 4323 default: 4324 g_assert_not_reached(); 4325 } 4326 } 4327 } 4328 4329 static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 4330 { 4331 if (!dc_isar_feature(aa64_sve, s)) { 4332 return false; 4333 } 4334 if (sve_access_check(s)) { 4335 int size = vec_full_reg_size(s); 4336 int off = vec_full_reg_offset(s, a->rd); 4337 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); 4338 } 4339 return true; 4340 } 4341 4342 static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 4343 { 4344 if (!dc_isar_feature(aa64_sve, s)) { 4345 return false; 4346 } 4347 if (sve_access_check(s)) { 4348 int size = pred_full_reg_size(s); 4349 int off = pred_full_reg_offset(s, a->rd); 4350 gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size); 4351 } 4352 return true; 4353 } 4354 4355 static bool trans_STR_zri(DisasContext *s, arg_rri *a) 4356 { 4357 if (!dc_isar_feature(aa64_sve, s)) { 4358 return false; 4359 } 4360 if (sve_access_check(s)) { 4361 int size = vec_full_reg_size(s); 4362 int off = vec_full_reg_offset(s, a->rd); 4363 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); 4364 } 4365 return true; 4366 } 4367 4368 static bool trans_STR_pri(DisasContext *s, arg_rri *a) 4369 { 4370 if (!dc_isar_feature(aa64_sve, s)) { 4371 return false; 4372 } 4373 if (sve_access_check(s)) { 4374 int size = pred_full_reg_size(s); 4375 int off = pred_full_reg_offset(s, a->rd); 4376 gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size); 4377 } 4378 return true; 4379 } 4380 4381 /* 4382 *** SVE Memory - Contiguous Load Group 4383 */ 4384 4385 /* The memory mode of the dtype. */ 4386 static const MemOp dtype_mop[16] = { 4387 MO_UB, MO_UB, MO_UB, MO_UB, 4388 MO_SL, MO_UW, MO_UW, MO_UW, 4389 MO_SW, MO_SW, MO_UL, MO_UL, 4390 MO_SB, MO_SB, MO_SB, MO_UQ 4391 }; 4392 4393 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 4394 4395 /* The vector element size of dtype. */ 4396 static const uint8_t dtype_esz[16] = { 4397 0, 1, 2, 3, 4398 3, 1, 2, 3, 4399 3, 2, 2, 3, 4400 3, 2, 1, 3 4401 }; 4402 4403 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 4404 int dtype, uint32_t mte_n, bool is_write, 4405 gen_helper_gvec_mem *fn) 4406 { 4407 unsigned vsz = vec_full_reg_size(s); 4408 TCGv_ptr t_pg; 4409 int desc = 0; 4410 4411 /* 4412 * For e.g. LD4, there are not enough arguments to pass all 4 4413 * registers as pointers, so encode the regno into the data field. 4414 * For consistency, do this even for LD1. 4415 */ 4416 if (s->mte_active[0]) { 4417 int msz = dtype_msz(dtype); 4418 4419 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 4420 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4421 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4422 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 4423 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1); 4424 desc <<= SVE_MTEDESC_SHIFT; 4425 } else { 4426 addr = clean_data_tbi(s, addr); 4427 } 4428 4429 desc = simd_desc(vsz, vsz, zt | desc); 4430 t_pg = tcg_temp_new_ptr(); 4431 4432 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 4433 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc)); 4434 } 4435 4436 /* Indexed by [mte][be][dtype][nreg] */ 4437 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { 4438 { /* mte inactive, little-endian */ 4439 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4440 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4441 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4442 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4443 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4444 4445 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 4446 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 4447 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 4448 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 4449 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 4450 4451 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 4452 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 4453 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 4454 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 4455 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 4456 4457 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4458 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4459 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4460 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 4461 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, 4462 4463 /* mte inactive, big-endian */ 4464 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 4465 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 4466 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 4467 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 4468 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 4469 4470 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 4471 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 4472 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 4473 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 4474 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 4475 4476 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 4477 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 4478 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 4479 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 4480 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 4481 4482 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 4483 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 4484 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 4485 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 4486 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, 4487 4488 { /* mte active, little-endian */ 4489 { { gen_helper_sve_ld1bb_r_mte, 4490 gen_helper_sve_ld2bb_r_mte, 4491 gen_helper_sve_ld3bb_r_mte, 4492 gen_helper_sve_ld4bb_r_mte }, 4493 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4494 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4495 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4496 4497 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 4498 { gen_helper_sve_ld1hh_le_r_mte, 4499 gen_helper_sve_ld2hh_le_r_mte, 4500 gen_helper_sve_ld3hh_le_r_mte, 4501 gen_helper_sve_ld4hh_le_r_mte }, 4502 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 4503 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 4504 4505 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 4506 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 4507 { gen_helper_sve_ld1ss_le_r_mte, 4508 gen_helper_sve_ld2ss_le_r_mte, 4509 gen_helper_sve_ld3ss_le_r_mte, 4510 gen_helper_sve_ld4ss_le_r_mte }, 4511 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 4512 4513 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4514 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4515 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4516 { gen_helper_sve_ld1dd_le_r_mte, 4517 gen_helper_sve_ld2dd_le_r_mte, 4518 gen_helper_sve_ld3dd_le_r_mte, 4519 gen_helper_sve_ld4dd_le_r_mte } }, 4520 4521 /* mte active, big-endian */ 4522 { { gen_helper_sve_ld1bb_r_mte, 4523 gen_helper_sve_ld2bb_r_mte, 4524 gen_helper_sve_ld3bb_r_mte, 4525 gen_helper_sve_ld4bb_r_mte }, 4526 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 4527 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 4528 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 4529 4530 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 4531 { gen_helper_sve_ld1hh_be_r_mte, 4532 gen_helper_sve_ld2hh_be_r_mte, 4533 gen_helper_sve_ld3hh_be_r_mte, 4534 gen_helper_sve_ld4hh_be_r_mte }, 4535 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 4536 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 4537 4538 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 4539 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 4540 { gen_helper_sve_ld1ss_be_r_mte, 4541 gen_helper_sve_ld2ss_be_r_mte, 4542 gen_helper_sve_ld3ss_be_r_mte, 4543 gen_helper_sve_ld4ss_be_r_mte }, 4544 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 4545 4546 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 4547 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 4548 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 4549 { gen_helper_sve_ld1dd_be_r_mte, 4550 gen_helper_sve_ld2dd_be_r_mte, 4551 gen_helper_sve_ld3dd_be_r_mte, 4552 gen_helper_sve_ld4dd_be_r_mte } } }, 4553 }; 4554 4555 static void do_ld_zpa(DisasContext *s, int zt, int pg, 4556 TCGv_i64 addr, int dtype, int nreg) 4557 { 4558 gen_helper_gvec_mem *fn 4559 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 4560 4561 /* 4562 * While there are holes in the table, they are not 4563 * accessible via the instruction encoding. 4564 */ 4565 assert(fn != NULL); 4566 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); 4567 } 4568 4569 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 4570 { 4571 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4572 return false; 4573 } 4574 if (sve_access_check(s)) { 4575 TCGv_i64 addr = tcg_temp_new_i64(); 4576 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4577 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4578 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4579 } 4580 return true; 4581 } 4582 4583 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 4584 { 4585 if (!dc_isar_feature(aa64_sve, s)) { 4586 return false; 4587 } 4588 if (sve_access_check(s)) { 4589 int vsz = vec_full_reg_size(s); 4590 int elements = vsz >> dtype_esz[a->dtype]; 4591 TCGv_i64 addr = tcg_temp_new_i64(); 4592 4593 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 4594 (a->imm * elements * (a->nreg + 1)) 4595 << dtype_msz(a->dtype)); 4596 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 4597 } 4598 return true; 4599 } 4600 4601 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 4602 { 4603 static gen_helper_gvec_mem * const fns[2][2][16] = { 4604 { /* mte inactive, little-endian */ 4605 { gen_helper_sve_ldff1bb_r, 4606 gen_helper_sve_ldff1bhu_r, 4607 gen_helper_sve_ldff1bsu_r, 4608 gen_helper_sve_ldff1bdu_r, 4609 4610 gen_helper_sve_ldff1sds_le_r, 4611 gen_helper_sve_ldff1hh_le_r, 4612 gen_helper_sve_ldff1hsu_le_r, 4613 gen_helper_sve_ldff1hdu_le_r, 4614 4615 gen_helper_sve_ldff1hds_le_r, 4616 gen_helper_sve_ldff1hss_le_r, 4617 gen_helper_sve_ldff1ss_le_r, 4618 gen_helper_sve_ldff1sdu_le_r, 4619 4620 gen_helper_sve_ldff1bds_r, 4621 gen_helper_sve_ldff1bss_r, 4622 gen_helper_sve_ldff1bhs_r, 4623 gen_helper_sve_ldff1dd_le_r }, 4624 4625 /* mte inactive, big-endian */ 4626 { gen_helper_sve_ldff1bb_r, 4627 gen_helper_sve_ldff1bhu_r, 4628 gen_helper_sve_ldff1bsu_r, 4629 gen_helper_sve_ldff1bdu_r, 4630 4631 gen_helper_sve_ldff1sds_be_r, 4632 gen_helper_sve_ldff1hh_be_r, 4633 gen_helper_sve_ldff1hsu_be_r, 4634 gen_helper_sve_ldff1hdu_be_r, 4635 4636 gen_helper_sve_ldff1hds_be_r, 4637 gen_helper_sve_ldff1hss_be_r, 4638 gen_helper_sve_ldff1ss_be_r, 4639 gen_helper_sve_ldff1sdu_be_r, 4640 4641 gen_helper_sve_ldff1bds_r, 4642 gen_helper_sve_ldff1bss_r, 4643 gen_helper_sve_ldff1bhs_r, 4644 gen_helper_sve_ldff1dd_be_r } }, 4645 4646 { /* mte active, little-endian */ 4647 { gen_helper_sve_ldff1bb_r_mte, 4648 gen_helper_sve_ldff1bhu_r_mte, 4649 gen_helper_sve_ldff1bsu_r_mte, 4650 gen_helper_sve_ldff1bdu_r_mte, 4651 4652 gen_helper_sve_ldff1sds_le_r_mte, 4653 gen_helper_sve_ldff1hh_le_r_mte, 4654 gen_helper_sve_ldff1hsu_le_r_mte, 4655 gen_helper_sve_ldff1hdu_le_r_mte, 4656 4657 gen_helper_sve_ldff1hds_le_r_mte, 4658 gen_helper_sve_ldff1hss_le_r_mte, 4659 gen_helper_sve_ldff1ss_le_r_mte, 4660 gen_helper_sve_ldff1sdu_le_r_mte, 4661 4662 gen_helper_sve_ldff1bds_r_mte, 4663 gen_helper_sve_ldff1bss_r_mte, 4664 gen_helper_sve_ldff1bhs_r_mte, 4665 gen_helper_sve_ldff1dd_le_r_mte }, 4666 4667 /* mte active, big-endian */ 4668 { gen_helper_sve_ldff1bb_r_mte, 4669 gen_helper_sve_ldff1bhu_r_mte, 4670 gen_helper_sve_ldff1bsu_r_mte, 4671 gen_helper_sve_ldff1bdu_r_mte, 4672 4673 gen_helper_sve_ldff1sds_be_r_mte, 4674 gen_helper_sve_ldff1hh_be_r_mte, 4675 gen_helper_sve_ldff1hsu_be_r_mte, 4676 gen_helper_sve_ldff1hdu_be_r_mte, 4677 4678 gen_helper_sve_ldff1hds_be_r_mte, 4679 gen_helper_sve_ldff1hss_be_r_mte, 4680 gen_helper_sve_ldff1ss_be_r_mte, 4681 gen_helper_sve_ldff1sdu_be_r_mte, 4682 4683 gen_helper_sve_ldff1bds_r_mte, 4684 gen_helper_sve_ldff1bss_r_mte, 4685 gen_helper_sve_ldff1bhs_r_mte, 4686 gen_helper_sve_ldff1dd_be_r_mte } }, 4687 }; 4688 4689 if (!dc_isar_feature(aa64_sve, s)) { 4690 return false; 4691 } 4692 s->is_nonstreaming = true; 4693 if (sve_access_check(s)) { 4694 TCGv_i64 addr = tcg_temp_new_i64(); 4695 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4696 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4697 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4698 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4699 } 4700 return true; 4701 } 4702 4703 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 4704 { 4705 static gen_helper_gvec_mem * const fns[2][2][16] = { 4706 { /* mte inactive, little-endian */ 4707 { gen_helper_sve_ldnf1bb_r, 4708 gen_helper_sve_ldnf1bhu_r, 4709 gen_helper_sve_ldnf1bsu_r, 4710 gen_helper_sve_ldnf1bdu_r, 4711 4712 gen_helper_sve_ldnf1sds_le_r, 4713 gen_helper_sve_ldnf1hh_le_r, 4714 gen_helper_sve_ldnf1hsu_le_r, 4715 gen_helper_sve_ldnf1hdu_le_r, 4716 4717 gen_helper_sve_ldnf1hds_le_r, 4718 gen_helper_sve_ldnf1hss_le_r, 4719 gen_helper_sve_ldnf1ss_le_r, 4720 gen_helper_sve_ldnf1sdu_le_r, 4721 4722 gen_helper_sve_ldnf1bds_r, 4723 gen_helper_sve_ldnf1bss_r, 4724 gen_helper_sve_ldnf1bhs_r, 4725 gen_helper_sve_ldnf1dd_le_r }, 4726 4727 /* mte inactive, big-endian */ 4728 { gen_helper_sve_ldnf1bb_r, 4729 gen_helper_sve_ldnf1bhu_r, 4730 gen_helper_sve_ldnf1bsu_r, 4731 gen_helper_sve_ldnf1bdu_r, 4732 4733 gen_helper_sve_ldnf1sds_be_r, 4734 gen_helper_sve_ldnf1hh_be_r, 4735 gen_helper_sve_ldnf1hsu_be_r, 4736 gen_helper_sve_ldnf1hdu_be_r, 4737 4738 gen_helper_sve_ldnf1hds_be_r, 4739 gen_helper_sve_ldnf1hss_be_r, 4740 gen_helper_sve_ldnf1ss_be_r, 4741 gen_helper_sve_ldnf1sdu_be_r, 4742 4743 gen_helper_sve_ldnf1bds_r, 4744 gen_helper_sve_ldnf1bss_r, 4745 gen_helper_sve_ldnf1bhs_r, 4746 gen_helper_sve_ldnf1dd_be_r } }, 4747 4748 { /* mte inactive, little-endian */ 4749 { gen_helper_sve_ldnf1bb_r_mte, 4750 gen_helper_sve_ldnf1bhu_r_mte, 4751 gen_helper_sve_ldnf1bsu_r_mte, 4752 gen_helper_sve_ldnf1bdu_r_mte, 4753 4754 gen_helper_sve_ldnf1sds_le_r_mte, 4755 gen_helper_sve_ldnf1hh_le_r_mte, 4756 gen_helper_sve_ldnf1hsu_le_r_mte, 4757 gen_helper_sve_ldnf1hdu_le_r_mte, 4758 4759 gen_helper_sve_ldnf1hds_le_r_mte, 4760 gen_helper_sve_ldnf1hss_le_r_mte, 4761 gen_helper_sve_ldnf1ss_le_r_mte, 4762 gen_helper_sve_ldnf1sdu_le_r_mte, 4763 4764 gen_helper_sve_ldnf1bds_r_mte, 4765 gen_helper_sve_ldnf1bss_r_mte, 4766 gen_helper_sve_ldnf1bhs_r_mte, 4767 gen_helper_sve_ldnf1dd_le_r_mte }, 4768 4769 /* mte inactive, big-endian */ 4770 { gen_helper_sve_ldnf1bb_r_mte, 4771 gen_helper_sve_ldnf1bhu_r_mte, 4772 gen_helper_sve_ldnf1bsu_r_mte, 4773 gen_helper_sve_ldnf1bdu_r_mte, 4774 4775 gen_helper_sve_ldnf1sds_be_r_mte, 4776 gen_helper_sve_ldnf1hh_be_r_mte, 4777 gen_helper_sve_ldnf1hsu_be_r_mte, 4778 gen_helper_sve_ldnf1hdu_be_r_mte, 4779 4780 gen_helper_sve_ldnf1hds_be_r_mte, 4781 gen_helper_sve_ldnf1hss_be_r_mte, 4782 gen_helper_sve_ldnf1ss_be_r_mte, 4783 gen_helper_sve_ldnf1sdu_be_r_mte, 4784 4785 gen_helper_sve_ldnf1bds_r_mte, 4786 gen_helper_sve_ldnf1bss_r_mte, 4787 gen_helper_sve_ldnf1bhs_r_mte, 4788 gen_helper_sve_ldnf1dd_be_r_mte } }, 4789 }; 4790 4791 if (!dc_isar_feature(aa64_sve, s)) { 4792 return false; 4793 } 4794 s->is_nonstreaming = true; 4795 if (sve_access_check(s)) { 4796 int vsz = vec_full_reg_size(s); 4797 int elements = vsz >> dtype_esz[a->dtype]; 4798 int off = (a->imm * elements) << dtype_msz(a->dtype); 4799 TCGv_i64 addr = tcg_temp_new_i64(); 4800 4801 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 4802 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 4803 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 4804 } 4805 return true; 4806 } 4807 4808 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4809 { 4810 unsigned vsz = vec_full_reg_size(s); 4811 TCGv_ptr t_pg; 4812 int poff; 4813 4814 /* Load the first quadword using the normal predicated load helpers. */ 4815 poff = pred_full_reg_offset(s, pg); 4816 if (vsz > 16) { 4817 /* 4818 * Zero-extend the first 16 bits of the predicate into a temporary. 4819 * This avoids triggering an assert making sure we don't have bits 4820 * set within a predicate beyond VQ, but we have lowered VQ to 1 4821 * for this load operation. 4822 */ 4823 TCGv_i64 tmp = tcg_temp_new_i64(); 4824 #if HOST_BIG_ENDIAN 4825 poff += 6; 4826 #endif 4827 tcg_gen_ld16u_i64(tmp, cpu_env, poff); 4828 4829 poff = offsetof(CPUARMState, vfp.preg_tmp); 4830 tcg_gen_st_i64(tmp, cpu_env, poff); 4831 } 4832 4833 t_pg = tcg_temp_new_ptr(); 4834 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 4835 4836 gen_helper_gvec_mem *fn 4837 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4838 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt))); 4839 4840 /* Replicate that first quadword. */ 4841 if (vsz > 16) { 4842 int doff = vec_full_reg_offset(s, zt); 4843 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 4844 } 4845 } 4846 4847 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 4848 { 4849 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 4850 return false; 4851 } 4852 if (sve_access_check(s)) { 4853 int msz = dtype_msz(a->dtype); 4854 TCGv_i64 addr = tcg_temp_new_i64(); 4855 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 4856 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4857 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4858 } 4859 return true; 4860 } 4861 4862 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 4863 { 4864 if (!dc_isar_feature(aa64_sve, s)) { 4865 return false; 4866 } 4867 if (sve_access_check(s)) { 4868 TCGv_i64 addr = tcg_temp_new_i64(); 4869 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 4870 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 4871 } 4872 return true; 4873 } 4874 4875 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 4876 { 4877 unsigned vsz = vec_full_reg_size(s); 4878 unsigned vsz_r32; 4879 TCGv_ptr t_pg; 4880 int poff, doff; 4881 4882 if (vsz < 32) { 4883 /* 4884 * Note that this UNDEFINED check comes after CheckSVEEnabled() 4885 * in the ARM pseudocode, which is the sve_access_check() done 4886 * in our caller. We should not now return false from the caller. 4887 */ 4888 unallocated_encoding(s); 4889 return; 4890 } 4891 4892 /* Load the first octaword using the normal predicated load helpers. */ 4893 4894 poff = pred_full_reg_offset(s, pg); 4895 if (vsz > 32) { 4896 /* 4897 * Zero-extend the first 32 bits of the predicate into a temporary. 4898 * This avoids triggering an assert making sure we don't have bits 4899 * set within a predicate beyond VQ, but we have lowered VQ to 2 4900 * for this load operation. 4901 */ 4902 TCGv_i64 tmp = tcg_temp_new_i64(); 4903 #if HOST_BIG_ENDIAN 4904 poff += 4; 4905 #endif 4906 tcg_gen_ld32u_i64(tmp, cpu_env, poff); 4907 4908 poff = offsetof(CPUARMState, vfp.preg_tmp); 4909 tcg_gen_st_i64(tmp, cpu_env, poff); 4910 } 4911 4912 t_pg = tcg_temp_new_ptr(); 4913 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 4914 4915 gen_helper_gvec_mem *fn 4916 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 4917 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt))); 4918 4919 /* 4920 * Replicate that first octaword. 4921 * The replication happens in units of 32; if the full vector size 4922 * is not a multiple of 32, the final bits are zeroed. 4923 */ 4924 doff = vec_full_reg_offset(s, zt); 4925 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 4926 if (vsz >= 64) { 4927 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 4928 } 4929 vsz -= vsz_r32; 4930 if (vsz) { 4931 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 4932 } 4933 } 4934 4935 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 4936 { 4937 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4938 return false; 4939 } 4940 if (a->rm == 31) { 4941 return false; 4942 } 4943 s->is_nonstreaming = true; 4944 if (sve_access_check(s)) { 4945 TCGv_i64 addr = tcg_temp_new_i64(); 4946 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 4947 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 4948 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4949 } 4950 return true; 4951 } 4952 4953 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 4954 { 4955 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 4956 return false; 4957 } 4958 s->is_nonstreaming = true; 4959 if (sve_access_check(s)) { 4960 TCGv_i64 addr = tcg_temp_new_i64(); 4961 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 4962 do_ldro(s, a->rd, a->pg, addr, a->dtype); 4963 } 4964 return true; 4965 } 4966 4967 /* Load and broadcast element. */ 4968 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 4969 { 4970 unsigned vsz = vec_full_reg_size(s); 4971 unsigned psz = pred_full_reg_size(s); 4972 unsigned esz = dtype_esz[a->dtype]; 4973 unsigned msz = dtype_msz(a->dtype); 4974 TCGLabel *over; 4975 TCGv_i64 temp, clean_addr; 4976 4977 if (!dc_isar_feature(aa64_sve, s)) { 4978 return false; 4979 } 4980 if (!sve_access_check(s)) { 4981 return true; 4982 } 4983 4984 over = gen_new_label(); 4985 4986 /* If the guarding predicate has no bits set, no load occurs. */ 4987 if (psz <= 8) { 4988 /* Reduce the pred_esz_masks value simply to reduce the 4989 * size of the code generated here. 4990 */ 4991 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 4992 temp = tcg_temp_new_i64(); 4993 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg)); 4994 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 4995 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 4996 } else { 4997 TCGv_i32 t32 = tcg_temp_new_i32(); 4998 find_last_active(s, t32, esz, a->pg); 4999 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 5000 } 5001 5002 /* Load the data. */ 5003 temp = tcg_temp_new_i64(); 5004 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 5005 clean_addr = gen_mte_check1(s, temp, false, true, msz); 5006 5007 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), 5008 finalize_memop(s, dtype_mop[a->dtype])); 5009 5010 /* Broadcast to *all* elements. */ 5011 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5012 vsz, vsz, temp); 5013 5014 /* Zero the inactive elements. */ 5015 gen_set_label(over); 5016 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 5017 } 5018 5019 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 5020 int msz, int esz, int nreg) 5021 { 5022 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { 5023 { { { gen_helper_sve_st1bb_r, 5024 gen_helper_sve_st1bh_r, 5025 gen_helper_sve_st1bs_r, 5026 gen_helper_sve_st1bd_r }, 5027 { NULL, 5028 gen_helper_sve_st1hh_le_r, 5029 gen_helper_sve_st1hs_le_r, 5030 gen_helper_sve_st1hd_le_r }, 5031 { NULL, NULL, 5032 gen_helper_sve_st1ss_le_r, 5033 gen_helper_sve_st1sd_le_r }, 5034 { NULL, NULL, NULL, 5035 gen_helper_sve_st1dd_le_r } }, 5036 { { gen_helper_sve_st1bb_r, 5037 gen_helper_sve_st1bh_r, 5038 gen_helper_sve_st1bs_r, 5039 gen_helper_sve_st1bd_r }, 5040 { NULL, 5041 gen_helper_sve_st1hh_be_r, 5042 gen_helper_sve_st1hs_be_r, 5043 gen_helper_sve_st1hd_be_r }, 5044 { NULL, NULL, 5045 gen_helper_sve_st1ss_be_r, 5046 gen_helper_sve_st1sd_be_r }, 5047 { NULL, NULL, NULL, 5048 gen_helper_sve_st1dd_be_r } } }, 5049 5050 { { { gen_helper_sve_st1bb_r_mte, 5051 gen_helper_sve_st1bh_r_mte, 5052 gen_helper_sve_st1bs_r_mte, 5053 gen_helper_sve_st1bd_r_mte }, 5054 { NULL, 5055 gen_helper_sve_st1hh_le_r_mte, 5056 gen_helper_sve_st1hs_le_r_mte, 5057 gen_helper_sve_st1hd_le_r_mte }, 5058 { NULL, NULL, 5059 gen_helper_sve_st1ss_le_r_mte, 5060 gen_helper_sve_st1sd_le_r_mte }, 5061 { NULL, NULL, NULL, 5062 gen_helper_sve_st1dd_le_r_mte } }, 5063 { { gen_helper_sve_st1bb_r_mte, 5064 gen_helper_sve_st1bh_r_mte, 5065 gen_helper_sve_st1bs_r_mte, 5066 gen_helper_sve_st1bd_r_mte }, 5067 { NULL, 5068 gen_helper_sve_st1hh_be_r_mte, 5069 gen_helper_sve_st1hs_be_r_mte, 5070 gen_helper_sve_st1hd_be_r_mte }, 5071 { NULL, NULL, 5072 gen_helper_sve_st1ss_be_r_mte, 5073 gen_helper_sve_st1sd_be_r_mte }, 5074 { NULL, NULL, NULL, 5075 gen_helper_sve_st1dd_be_r_mte } } }, 5076 }; 5077 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { 5078 { { { gen_helper_sve_st2bb_r, 5079 gen_helper_sve_st2hh_le_r, 5080 gen_helper_sve_st2ss_le_r, 5081 gen_helper_sve_st2dd_le_r }, 5082 { gen_helper_sve_st3bb_r, 5083 gen_helper_sve_st3hh_le_r, 5084 gen_helper_sve_st3ss_le_r, 5085 gen_helper_sve_st3dd_le_r }, 5086 { gen_helper_sve_st4bb_r, 5087 gen_helper_sve_st4hh_le_r, 5088 gen_helper_sve_st4ss_le_r, 5089 gen_helper_sve_st4dd_le_r } }, 5090 { { gen_helper_sve_st2bb_r, 5091 gen_helper_sve_st2hh_be_r, 5092 gen_helper_sve_st2ss_be_r, 5093 gen_helper_sve_st2dd_be_r }, 5094 { gen_helper_sve_st3bb_r, 5095 gen_helper_sve_st3hh_be_r, 5096 gen_helper_sve_st3ss_be_r, 5097 gen_helper_sve_st3dd_be_r }, 5098 { gen_helper_sve_st4bb_r, 5099 gen_helper_sve_st4hh_be_r, 5100 gen_helper_sve_st4ss_be_r, 5101 gen_helper_sve_st4dd_be_r } } }, 5102 { { { gen_helper_sve_st2bb_r_mte, 5103 gen_helper_sve_st2hh_le_r_mte, 5104 gen_helper_sve_st2ss_le_r_mte, 5105 gen_helper_sve_st2dd_le_r_mte }, 5106 { gen_helper_sve_st3bb_r_mte, 5107 gen_helper_sve_st3hh_le_r_mte, 5108 gen_helper_sve_st3ss_le_r_mte, 5109 gen_helper_sve_st3dd_le_r_mte }, 5110 { gen_helper_sve_st4bb_r_mte, 5111 gen_helper_sve_st4hh_le_r_mte, 5112 gen_helper_sve_st4ss_le_r_mte, 5113 gen_helper_sve_st4dd_le_r_mte } }, 5114 { { gen_helper_sve_st2bb_r_mte, 5115 gen_helper_sve_st2hh_be_r_mte, 5116 gen_helper_sve_st2ss_be_r_mte, 5117 gen_helper_sve_st2dd_be_r_mte }, 5118 { gen_helper_sve_st3bb_r_mte, 5119 gen_helper_sve_st3hh_be_r_mte, 5120 gen_helper_sve_st3ss_be_r_mte, 5121 gen_helper_sve_st3dd_be_r_mte }, 5122 { gen_helper_sve_st4bb_r_mte, 5123 gen_helper_sve_st4hh_be_r_mte, 5124 gen_helper_sve_st4ss_be_r_mte, 5125 gen_helper_sve_st4dd_be_r_mte } } }, 5126 }; 5127 gen_helper_gvec_mem *fn; 5128 int be = s->be_data == MO_BE; 5129 5130 if (nreg == 0) { 5131 /* ST1 */ 5132 fn = fn_single[s->mte_active[0]][be][msz][esz]; 5133 nreg = 1; 5134 } else { 5135 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 5136 assert(msz == esz); 5137 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 5138 } 5139 assert(fn != NULL); 5140 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); 5141 } 5142 5143 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 5144 { 5145 if (!dc_isar_feature(aa64_sve, s)) { 5146 return false; 5147 } 5148 if (a->rm == 31 || a->msz > a->esz) { 5149 return false; 5150 } 5151 if (sve_access_check(s)) { 5152 TCGv_i64 addr = tcg_temp_new_i64(); 5153 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 5154 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5155 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5156 } 5157 return true; 5158 } 5159 5160 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 5161 { 5162 if (!dc_isar_feature(aa64_sve, s)) { 5163 return false; 5164 } 5165 if (a->msz > a->esz) { 5166 return false; 5167 } 5168 if (sve_access_check(s)) { 5169 int vsz = vec_full_reg_size(s); 5170 int elements = vsz >> a->esz; 5171 TCGv_i64 addr = tcg_temp_new_i64(); 5172 5173 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5174 (a->imm * elements * (a->nreg + 1)) << a->msz); 5175 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 5176 } 5177 return true; 5178 } 5179 5180 /* 5181 *** SVE gather loads / scatter stores 5182 */ 5183 5184 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 5185 int scale, TCGv_i64 scalar, int msz, bool is_write, 5186 gen_helper_gvec_mem_scatter *fn) 5187 { 5188 unsigned vsz = vec_full_reg_size(s); 5189 TCGv_ptr t_zm = tcg_temp_new_ptr(); 5190 TCGv_ptr t_pg = tcg_temp_new_ptr(); 5191 TCGv_ptr t_zt = tcg_temp_new_ptr(); 5192 int desc = 0; 5193 5194 if (s->mte_active[0]) { 5195 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 5196 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 5197 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 5198 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 5199 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1); 5200 desc <<= SVE_MTEDESC_SHIFT; 5201 } 5202 desc = simd_desc(vsz, vsz, desc | scale); 5203 5204 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 5205 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm)); 5206 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt)); 5207 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); 5208 } 5209 5210 /* Indexed by [mte][be][ff][xs][u][msz]. */ 5211 static gen_helper_gvec_mem_scatter * const 5212 gather_load_fn32[2][2][2][2][2][3] = { 5213 { /* MTE Inactive */ 5214 { /* Little-endian */ 5215 { { { gen_helper_sve_ldbss_zsu, 5216 gen_helper_sve_ldhss_le_zsu, 5217 NULL, }, 5218 { gen_helper_sve_ldbsu_zsu, 5219 gen_helper_sve_ldhsu_le_zsu, 5220 gen_helper_sve_ldss_le_zsu, } }, 5221 { { gen_helper_sve_ldbss_zss, 5222 gen_helper_sve_ldhss_le_zss, 5223 NULL, }, 5224 { gen_helper_sve_ldbsu_zss, 5225 gen_helper_sve_ldhsu_le_zss, 5226 gen_helper_sve_ldss_le_zss, } } }, 5227 5228 /* First-fault */ 5229 { { { gen_helper_sve_ldffbss_zsu, 5230 gen_helper_sve_ldffhss_le_zsu, 5231 NULL, }, 5232 { gen_helper_sve_ldffbsu_zsu, 5233 gen_helper_sve_ldffhsu_le_zsu, 5234 gen_helper_sve_ldffss_le_zsu, } }, 5235 { { gen_helper_sve_ldffbss_zss, 5236 gen_helper_sve_ldffhss_le_zss, 5237 NULL, }, 5238 { gen_helper_sve_ldffbsu_zss, 5239 gen_helper_sve_ldffhsu_le_zss, 5240 gen_helper_sve_ldffss_le_zss, } } } }, 5241 5242 { /* Big-endian */ 5243 { { { gen_helper_sve_ldbss_zsu, 5244 gen_helper_sve_ldhss_be_zsu, 5245 NULL, }, 5246 { gen_helper_sve_ldbsu_zsu, 5247 gen_helper_sve_ldhsu_be_zsu, 5248 gen_helper_sve_ldss_be_zsu, } }, 5249 { { gen_helper_sve_ldbss_zss, 5250 gen_helper_sve_ldhss_be_zss, 5251 NULL, }, 5252 { gen_helper_sve_ldbsu_zss, 5253 gen_helper_sve_ldhsu_be_zss, 5254 gen_helper_sve_ldss_be_zss, } } }, 5255 5256 /* First-fault */ 5257 { { { gen_helper_sve_ldffbss_zsu, 5258 gen_helper_sve_ldffhss_be_zsu, 5259 NULL, }, 5260 { gen_helper_sve_ldffbsu_zsu, 5261 gen_helper_sve_ldffhsu_be_zsu, 5262 gen_helper_sve_ldffss_be_zsu, } }, 5263 { { gen_helper_sve_ldffbss_zss, 5264 gen_helper_sve_ldffhss_be_zss, 5265 NULL, }, 5266 { gen_helper_sve_ldffbsu_zss, 5267 gen_helper_sve_ldffhsu_be_zss, 5268 gen_helper_sve_ldffss_be_zss, } } } } }, 5269 { /* MTE Active */ 5270 { /* Little-endian */ 5271 { { { gen_helper_sve_ldbss_zsu_mte, 5272 gen_helper_sve_ldhss_le_zsu_mte, 5273 NULL, }, 5274 { gen_helper_sve_ldbsu_zsu_mte, 5275 gen_helper_sve_ldhsu_le_zsu_mte, 5276 gen_helper_sve_ldss_le_zsu_mte, } }, 5277 { { gen_helper_sve_ldbss_zss_mte, 5278 gen_helper_sve_ldhss_le_zss_mte, 5279 NULL, }, 5280 { gen_helper_sve_ldbsu_zss_mte, 5281 gen_helper_sve_ldhsu_le_zss_mte, 5282 gen_helper_sve_ldss_le_zss_mte, } } }, 5283 5284 /* First-fault */ 5285 { { { gen_helper_sve_ldffbss_zsu_mte, 5286 gen_helper_sve_ldffhss_le_zsu_mte, 5287 NULL, }, 5288 { gen_helper_sve_ldffbsu_zsu_mte, 5289 gen_helper_sve_ldffhsu_le_zsu_mte, 5290 gen_helper_sve_ldffss_le_zsu_mte, } }, 5291 { { gen_helper_sve_ldffbss_zss_mte, 5292 gen_helper_sve_ldffhss_le_zss_mte, 5293 NULL, }, 5294 { gen_helper_sve_ldffbsu_zss_mte, 5295 gen_helper_sve_ldffhsu_le_zss_mte, 5296 gen_helper_sve_ldffss_le_zss_mte, } } } }, 5297 5298 { /* Big-endian */ 5299 { { { gen_helper_sve_ldbss_zsu_mte, 5300 gen_helper_sve_ldhss_be_zsu_mte, 5301 NULL, }, 5302 { gen_helper_sve_ldbsu_zsu_mte, 5303 gen_helper_sve_ldhsu_be_zsu_mte, 5304 gen_helper_sve_ldss_be_zsu_mte, } }, 5305 { { gen_helper_sve_ldbss_zss_mte, 5306 gen_helper_sve_ldhss_be_zss_mte, 5307 NULL, }, 5308 { gen_helper_sve_ldbsu_zss_mte, 5309 gen_helper_sve_ldhsu_be_zss_mte, 5310 gen_helper_sve_ldss_be_zss_mte, } } }, 5311 5312 /* First-fault */ 5313 { { { gen_helper_sve_ldffbss_zsu_mte, 5314 gen_helper_sve_ldffhss_be_zsu_mte, 5315 NULL, }, 5316 { gen_helper_sve_ldffbsu_zsu_mte, 5317 gen_helper_sve_ldffhsu_be_zsu_mte, 5318 gen_helper_sve_ldffss_be_zsu_mte, } }, 5319 { { gen_helper_sve_ldffbss_zss_mte, 5320 gen_helper_sve_ldffhss_be_zss_mte, 5321 NULL, }, 5322 { gen_helper_sve_ldffbsu_zss_mte, 5323 gen_helper_sve_ldffhsu_be_zss_mte, 5324 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 5325 }; 5326 5327 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5328 static gen_helper_gvec_mem_scatter * const 5329 gather_load_fn64[2][2][2][3][2][4] = { 5330 { /* MTE Inactive */ 5331 { /* Little-endian */ 5332 { { { gen_helper_sve_ldbds_zsu, 5333 gen_helper_sve_ldhds_le_zsu, 5334 gen_helper_sve_ldsds_le_zsu, 5335 NULL, }, 5336 { gen_helper_sve_ldbdu_zsu, 5337 gen_helper_sve_ldhdu_le_zsu, 5338 gen_helper_sve_ldsdu_le_zsu, 5339 gen_helper_sve_lddd_le_zsu, } }, 5340 { { gen_helper_sve_ldbds_zss, 5341 gen_helper_sve_ldhds_le_zss, 5342 gen_helper_sve_ldsds_le_zss, 5343 NULL, }, 5344 { gen_helper_sve_ldbdu_zss, 5345 gen_helper_sve_ldhdu_le_zss, 5346 gen_helper_sve_ldsdu_le_zss, 5347 gen_helper_sve_lddd_le_zss, } }, 5348 { { gen_helper_sve_ldbds_zd, 5349 gen_helper_sve_ldhds_le_zd, 5350 gen_helper_sve_ldsds_le_zd, 5351 NULL, }, 5352 { gen_helper_sve_ldbdu_zd, 5353 gen_helper_sve_ldhdu_le_zd, 5354 gen_helper_sve_ldsdu_le_zd, 5355 gen_helper_sve_lddd_le_zd, } } }, 5356 5357 /* First-fault */ 5358 { { { gen_helper_sve_ldffbds_zsu, 5359 gen_helper_sve_ldffhds_le_zsu, 5360 gen_helper_sve_ldffsds_le_zsu, 5361 NULL, }, 5362 { gen_helper_sve_ldffbdu_zsu, 5363 gen_helper_sve_ldffhdu_le_zsu, 5364 gen_helper_sve_ldffsdu_le_zsu, 5365 gen_helper_sve_ldffdd_le_zsu, } }, 5366 { { gen_helper_sve_ldffbds_zss, 5367 gen_helper_sve_ldffhds_le_zss, 5368 gen_helper_sve_ldffsds_le_zss, 5369 NULL, }, 5370 { gen_helper_sve_ldffbdu_zss, 5371 gen_helper_sve_ldffhdu_le_zss, 5372 gen_helper_sve_ldffsdu_le_zss, 5373 gen_helper_sve_ldffdd_le_zss, } }, 5374 { { gen_helper_sve_ldffbds_zd, 5375 gen_helper_sve_ldffhds_le_zd, 5376 gen_helper_sve_ldffsds_le_zd, 5377 NULL, }, 5378 { gen_helper_sve_ldffbdu_zd, 5379 gen_helper_sve_ldffhdu_le_zd, 5380 gen_helper_sve_ldffsdu_le_zd, 5381 gen_helper_sve_ldffdd_le_zd, } } } }, 5382 { /* Big-endian */ 5383 { { { gen_helper_sve_ldbds_zsu, 5384 gen_helper_sve_ldhds_be_zsu, 5385 gen_helper_sve_ldsds_be_zsu, 5386 NULL, }, 5387 { gen_helper_sve_ldbdu_zsu, 5388 gen_helper_sve_ldhdu_be_zsu, 5389 gen_helper_sve_ldsdu_be_zsu, 5390 gen_helper_sve_lddd_be_zsu, } }, 5391 { { gen_helper_sve_ldbds_zss, 5392 gen_helper_sve_ldhds_be_zss, 5393 gen_helper_sve_ldsds_be_zss, 5394 NULL, }, 5395 { gen_helper_sve_ldbdu_zss, 5396 gen_helper_sve_ldhdu_be_zss, 5397 gen_helper_sve_ldsdu_be_zss, 5398 gen_helper_sve_lddd_be_zss, } }, 5399 { { gen_helper_sve_ldbds_zd, 5400 gen_helper_sve_ldhds_be_zd, 5401 gen_helper_sve_ldsds_be_zd, 5402 NULL, }, 5403 { gen_helper_sve_ldbdu_zd, 5404 gen_helper_sve_ldhdu_be_zd, 5405 gen_helper_sve_ldsdu_be_zd, 5406 gen_helper_sve_lddd_be_zd, } } }, 5407 5408 /* First-fault */ 5409 { { { gen_helper_sve_ldffbds_zsu, 5410 gen_helper_sve_ldffhds_be_zsu, 5411 gen_helper_sve_ldffsds_be_zsu, 5412 NULL, }, 5413 { gen_helper_sve_ldffbdu_zsu, 5414 gen_helper_sve_ldffhdu_be_zsu, 5415 gen_helper_sve_ldffsdu_be_zsu, 5416 gen_helper_sve_ldffdd_be_zsu, } }, 5417 { { gen_helper_sve_ldffbds_zss, 5418 gen_helper_sve_ldffhds_be_zss, 5419 gen_helper_sve_ldffsds_be_zss, 5420 NULL, }, 5421 { gen_helper_sve_ldffbdu_zss, 5422 gen_helper_sve_ldffhdu_be_zss, 5423 gen_helper_sve_ldffsdu_be_zss, 5424 gen_helper_sve_ldffdd_be_zss, } }, 5425 { { gen_helper_sve_ldffbds_zd, 5426 gen_helper_sve_ldffhds_be_zd, 5427 gen_helper_sve_ldffsds_be_zd, 5428 NULL, }, 5429 { gen_helper_sve_ldffbdu_zd, 5430 gen_helper_sve_ldffhdu_be_zd, 5431 gen_helper_sve_ldffsdu_be_zd, 5432 gen_helper_sve_ldffdd_be_zd, } } } } }, 5433 { /* MTE Active */ 5434 { /* Little-endian */ 5435 { { { gen_helper_sve_ldbds_zsu_mte, 5436 gen_helper_sve_ldhds_le_zsu_mte, 5437 gen_helper_sve_ldsds_le_zsu_mte, 5438 NULL, }, 5439 { gen_helper_sve_ldbdu_zsu_mte, 5440 gen_helper_sve_ldhdu_le_zsu_mte, 5441 gen_helper_sve_ldsdu_le_zsu_mte, 5442 gen_helper_sve_lddd_le_zsu_mte, } }, 5443 { { gen_helper_sve_ldbds_zss_mte, 5444 gen_helper_sve_ldhds_le_zss_mte, 5445 gen_helper_sve_ldsds_le_zss_mte, 5446 NULL, }, 5447 { gen_helper_sve_ldbdu_zss_mte, 5448 gen_helper_sve_ldhdu_le_zss_mte, 5449 gen_helper_sve_ldsdu_le_zss_mte, 5450 gen_helper_sve_lddd_le_zss_mte, } }, 5451 { { gen_helper_sve_ldbds_zd_mte, 5452 gen_helper_sve_ldhds_le_zd_mte, 5453 gen_helper_sve_ldsds_le_zd_mte, 5454 NULL, }, 5455 { gen_helper_sve_ldbdu_zd_mte, 5456 gen_helper_sve_ldhdu_le_zd_mte, 5457 gen_helper_sve_ldsdu_le_zd_mte, 5458 gen_helper_sve_lddd_le_zd_mte, } } }, 5459 5460 /* First-fault */ 5461 { { { gen_helper_sve_ldffbds_zsu_mte, 5462 gen_helper_sve_ldffhds_le_zsu_mte, 5463 gen_helper_sve_ldffsds_le_zsu_mte, 5464 NULL, }, 5465 { gen_helper_sve_ldffbdu_zsu_mte, 5466 gen_helper_sve_ldffhdu_le_zsu_mte, 5467 gen_helper_sve_ldffsdu_le_zsu_mte, 5468 gen_helper_sve_ldffdd_le_zsu_mte, } }, 5469 { { gen_helper_sve_ldffbds_zss_mte, 5470 gen_helper_sve_ldffhds_le_zss_mte, 5471 gen_helper_sve_ldffsds_le_zss_mte, 5472 NULL, }, 5473 { gen_helper_sve_ldffbdu_zss_mte, 5474 gen_helper_sve_ldffhdu_le_zss_mte, 5475 gen_helper_sve_ldffsdu_le_zss_mte, 5476 gen_helper_sve_ldffdd_le_zss_mte, } }, 5477 { { gen_helper_sve_ldffbds_zd_mte, 5478 gen_helper_sve_ldffhds_le_zd_mte, 5479 gen_helper_sve_ldffsds_le_zd_mte, 5480 NULL, }, 5481 { gen_helper_sve_ldffbdu_zd_mte, 5482 gen_helper_sve_ldffhdu_le_zd_mte, 5483 gen_helper_sve_ldffsdu_le_zd_mte, 5484 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 5485 { /* Big-endian */ 5486 { { { gen_helper_sve_ldbds_zsu_mte, 5487 gen_helper_sve_ldhds_be_zsu_mte, 5488 gen_helper_sve_ldsds_be_zsu_mte, 5489 NULL, }, 5490 { gen_helper_sve_ldbdu_zsu_mte, 5491 gen_helper_sve_ldhdu_be_zsu_mte, 5492 gen_helper_sve_ldsdu_be_zsu_mte, 5493 gen_helper_sve_lddd_be_zsu_mte, } }, 5494 { { gen_helper_sve_ldbds_zss_mte, 5495 gen_helper_sve_ldhds_be_zss_mte, 5496 gen_helper_sve_ldsds_be_zss_mte, 5497 NULL, }, 5498 { gen_helper_sve_ldbdu_zss_mte, 5499 gen_helper_sve_ldhdu_be_zss_mte, 5500 gen_helper_sve_ldsdu_be_zss_mte, 5501 gen_helper_sve_lddd_be_zss_mte, } }, 5502 { { gen_helper_sve_ldbds_zd_mte, 5503 gen_helper_sve_ldhds_be_zd_mte, 5504 gen_helper_sve_ldsds_be_zd_mte, 5505 NULL, }, 5506 { gen_helper_sve_ldbdu_zd_mte, 5507 gen_helper_sve_ldhdu_be_zd_mte, 5508 gen_helper_sve_ldsdu_be_zd_mte, 5509 gen_helper_sve_lddd_be_zd_mte, } } }, 5510 5511 /* First-fault */ 5512 { { { gen_helper_sve_ldffbds_zsu_mte, 5513 gen_helper_sve_ldffhds_be_zsu_mte, 5514 gen_helper_sve_ldffsds_be_zsu_mte, 5515 NULL, }, 5516 { gen_helper_sve_ldffbdu_zsu_mte, 5517 gen_helper_sve_ldffhdu_be_zsu_mte, 5518 gen_helper_sve_ldffsdu_be_zsu_mte, 5519 gen_helper_sve_ldffdd_be_zsu_mte, } }, 5520 { { gen_helper_sve_ldffbds_zss_mte, 5521 gen_helper_sve_ldffhds_be_zss_mte, 5522 gen_helper_sve_ldffsds_be_zss_mte, 5523 NULL, }, 5524 { gen_helper_sve_ldffbdu_zss_mte, 5525 gen_helper_sve_ldffhdu_be_zss_mte, 5526 gen_helper_sve_ldffsdu_be_zss_mte, 5527 gen_helper_sve_ldffdd_be_zss_mte, } }, 5528 { { gen_helper_sve_ldffbds_zd_mte, 5529 gen_helper_sve_ldffhds_be_zd_mte, 5530 gen_helper_sve_ldffsds_be_zd_mte, 5531 NULL, }, 5532 { gen_helper_sve_ldffbdu_zd_mte, 5533 gen_helper_sve_ldffhdu_be_zd_mte, 5534 gen_helper_sve_ldffsdu_be_zd_mte, 5535 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 5536 }; 5537 5538 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 5539 { 5540 gen_helper_gvec_mem_scatter *fn = NULL; 5541 bool be = s->be_data == MO_BE; 5542 bool mte = s->mte_active[0]; 5543 5544 if (!dc_isar_feature(aa64_sve, s)) { 5545 return false; 5546 } 5547 s->is_nonstreaming = true; 5548 if (!sve_access_check(s)) { 5549 return true; 5550 } 5551 5552 switch (a->esz) { 5553 case MO_32: 5554 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 5555 break; 5556 case MO_64: 5557 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 5558 break; 5559 } 5560 assert(fn != NULL); 5561 5562 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5563 cpu_reg_sp(s, a->rn), a->msz, false, fn); 5564 return true; 5565 } 5566 5567 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 5568 { 5569 gen_helper_gvec_mem_scatter *fn = NULL; 5570 bool be = s->be_data == MO_BE; 5571 bool mte = s->mte_active[0]; 5572 5573 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 5574 return false; 5575 } 5576 if (!dc_isar_feature(aa64_sve, s)) { 5577 return false; 5578 } 5579 s->is_nonstreaming = true; 5580 if (!sve_access_check(s)) { 5581 return true; 5582 } 5583 5584 switch (a->esz) { 5585 case MO_32: 5586 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 5587 break; 5588 case MO_64: 5589 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 5590 break; 5591 } 5592 assert(fn != NULL); 5593 5594 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 5595 * by loading the immediate into the scalar parameter. 5596 */ 5597 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5598 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); 5599 return true; 5600 } 5601 5602 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 5603 { 5604 gen_helper_gvec_mem_scatter *fn = NULL; 5605 bool be = s->be_data == MO_BE; 5606 bool mte = s->mte_active[0]; 5607 5608 if (a->esz < a->msz + !a->u) { 5609 return false; 5610 } 5611 if (!dc_isar_feature(aa64_sve2, s)) { 5612 return false; 5613 } 5614 s->is_nonstreaming = true; 5615 if (!sve_access_check(s)) { 5616 return true; 5617 } 5618 5619 switch (a->esz) { 5620 case MO_32: 5621 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; 5622 break; 5623 case MO_64: 5624 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; 5625 break; 5626 } 5627 assert(fn != NULL); 5628 5629 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5630 cpu_reg(s, a->rm), a->msz, false, fn); 5631 return true; 5632 } 5633 5634 /* Indexed by [mte][be][xs][msz]. */ 5635 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 5636 { /* MTE Inactive */ 5637 { /* Little-endian */ 5638 { gen_helper_sve_stbs_zsu, 5639 gen_helper_sve_sths_le_zsu, 5640 gen_helper_sve_stss_le_zsu, }, 5641 { gen_helper_sve_stbs_zss, 5642 gen_helper_sve_sths_le_zss, 5643 gen_helper_sve_stss_le_zss, } }, 5644 { /* Big-endian */ 5645 { gen_helper_sve_stbs_zsu, 5646 gen_helper_sve_sths_be_zsu, 5647 gen_helper_sve_stss_be_zsu, }, 5648 { gen_helper_sve_stbs_zss, 5649 gen_helper_sve_sths_be_zss, 5650 gen_helper_sve_stss_be_zss, } } }, 5651 { /* MTE Active */ 5652 { /* Little-endian */ 5653 { gen_helper_sve_stbs_zsu_mte, 5654 gen_helper_sve_sths_le_zsu_mte, 5655 gen_helper_sve_stss_le_zsu_mte, }, 5656 { gen_helper_sve_stbs_zss_mte, 5657 gen_helper_sve_sths_le_zss_mte, 5658 gen_helper_sve_stss_le_zss_mte, } }, 5659 { /* Big-endian */ 5660 { gen_helper_sve_stbs_zsu_mte, 5661 gen_helper_sve_sths_be_zsu_mte, 5662 gen_helper_sve_stss_be_zsu_mte, }, 5663 { gen_helper_sve_stbs_zss_mte, 5664 gen_helper_sve_sths_be_zss_mte, 5665 gen_helper_sve_stss_be_zss_mte, } } }, 5666 }; 5667 5668 /* Note that we overload xs=2 to indicate 64-bit offset. */ 5669 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 5670 { /* MTE Inactive */ 5671 { /* Little-endian */ 5672 { gen_helper_sve_stbd_zsu, 5673 gen_helper_sve_sthd_le_zsu, 5674 gen_helper_sve_stsd_le_zsu, 5675 gen_helper_sve_stdd_le_zsu, }, 5676 { gen_helper_sve_stbd_zss, 5677 gen_helper_sve_sthd_le_zss, 5678 gen_helper_sve_stsd_le_zss, 5679 gen_helper_sve_stdd_le_zss, }, 5680 { gen_helper_sve_stbd_zd, 5681 gen_helper_sve_sthd_le_zd, 5682 gen_helper_sve_stsd_le_zd, 5683 gen_helper_sve_stdd_le_zd, } }, 5684 { /* Big-endian */ 5685 { gen_helper_sve_stbd_zsu, 5686 gen_helper_sve_sthd_be_zsu, 5687 gen_helper_sve_stsd_be_zsu, 5688 gen_helper_sve_stdd_be_zsu, }, 5689 { gen_helper_sve_stbd_zss, 5690 gen_helper_sve_sthd_be_zss, 5691 gen_helper_sve_stsd_be_zss, 5692 gen_helper_sve_stdd_be_zss, }, 5693 { gen_helper_sve_stbd_zd, 5694 gen_helper_sve_sthd_be_zd, 5695 gen_helper_sve_stsd_be_zd, 5696 gen_helper_sve_stdd_be_zd, } } }, 5697 { /* MTE Inactive */ 5698 { /* Little-endian */ 5699 { gen_helper_sve_stbd_zsu_mte, 5700 gen_helper_sve_sthd_le_zsu_mte, 5701 gen_helper_sve_stsd_le_zsu_mte, 5702 gen_helper_sve_stdd_le_zsu_mte, }, 5703 { gen_helper_sve_stbd_zss_mte, 5704 gen_helper_sve_sthd_le_zss_mte, 5705 gen_helper_sve_stsd_le_zss_mte, 5706 gen_helper_sve_stdd_le_zss_mte, }, 5707 { gen_helper_sve_stbd_zd_mte, 5708 gen_helper_sve_sthd_le_zd_mte, 5709 gen_helper_sve_stsd_le_zd_mte, 5710 gen_helper_sve_stdd_le_zd_mte, } }, 5711 { /* Big-endian */ 5712 { gen_helper_sve_stbd_zsu_mte, 5713 gen_helper_sve_sthd_be_zsu_mte, 5714 gen_helper_sve_stsd_be_zsu_mte, 5715 gen_helper_sve_stdd_be_zsu_mte, }, 5716 { gen_helper_sve_stbd_zss_mte, 5717 gen_helper_sve_sthd_be_zss_mte, 5718 gen_helper_sve_stsd_be_zss_mte, 5719 gen_helper_sve_stdd_be_zss_mte, }, 5720 { gen_helper_sve_stbd_zd_mte, 5721 gen_helper_sve_sthd_be_zd_mte, 5722 gen_helper_sve_stsd_be_zd_mte, 5723 gen_helper_sve_stdd_be_zd_mte, } } }, 5724 }; 5725 5726 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 5727 { 5728 gen_helper_gvec_mem_scatter *fn; 5729 bool be = s->be_data == MO_BE; 5730 bool mte = s->mte_active[0]; 5731 5732 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 5733 return false; 5734 } 5735 if (!dc_isar_feature(aa64_sve, s)) { 5736 return false; 5737 } 5738 s->is_nonstreaming = true; 5739 if (!sve_access_check(s)) { 5740 return true; 5741 } 5742 switch (a->esz) { 5743 case MO_32: 5744 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 5745 break; 5746 case MO_64: 5747 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 5748 break; 5749 default: 5750 g_assert_not_reached(); 5751 } 5752 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 5753 cpu_reg_sp(s, a->rn), a->msz, true, fn); 5754 return true; 5755 } 5756 5757 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 5758 { 5759 gen_helper_gvec_mem_scatter *fn = NULL; 5760 bool be = s->be_data == MO_BE; 5761 bool mte = s->mte_active[0]; 5762 5763 if (a->esz < a->msz) { 5764 return false; 5765 } 5766 if (!dc_isar_feature(aa64_sve, s)) { 5767 return false; 5768 } 5769 s->is_nonstreaming = true; 5770 if (!sve_access_check(s)) { 5771 return true; 5772 } 5773 5774 switch (a->esz) { 5775 case MO_32: 5776 fn = scatter_store_fn32[mte][be][0][a->msz]; 5777 break; 5778 case MO_64: 5779 fn = scatter_store_fn64[mte][be][2][a->msz]; 5780 break; 5781 } 5782 assert(fn != NULL); 5783 5784 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 5785 * by loading the immediate into the scalar parameter. 5786 */ 5787 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5788 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); 5789 return true; 5790 } 5791 5792 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 5793 { 5794 gen_helper_gvec_mem_scatter *fn; 5795 bool be = s->be_data == MO_BE; 5796 bool mte = s->mte_active[0]; 5797 5798 if (a->esz < a->msz) { 5799 return false; 5800 } 5801 if (!dc_isar_feature(aa64_sve2, s)) { 5802 return false; 5803 } 5804 s->is_nonstreaming = true; 5805 if (!sve_access_check(s)) { 5806 return true; 5807 } 5808 5809 switch (a->esz) { 5810 case MO_32: 5811 fn = scatter_store_fn32[mte][be][0][a->msz]; 5812 break; 5813 case MO_64: 5814 fn = scatter_store_fn64[mte][be][2][a->msz]; 5815 break; 5816 default: 5817 g_assert_not_reached(); 5818 } 5819 5820 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, 5821 cpu_reg(s, a->rm), a->msz, true, fn); 5822 return true; 5823 } 5824 5825 /* 5826 * Prefetches 5827 */ 5828 5829 static bool trans_PRF(DisasContext *s, arg_PRF *a) 5830 { 5831 if (!dc_isar_feature(aa64_sve, s)) { 5832 return false; 5833 } 5834 /* Prefetch is a nop within QEMU. */ 5835 (void)sve_access_check(s); 5836 return true; 5837 } 5838 5839 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 5840 { 5841 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { 5842 return false; 5843 } 5844 /* Prefetch is a nop within QEMU. */ 5845 (void)sve_access_check(s); 5846 return true; 5847 } 5848 5849 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) 5850 { 5851 if (!dc_isar_feature(aa64_sve, s)) { 5852 return false; 5853 } 5854 /* Prefetch is a nop within QEMU. */ 5855 s->is_nonstreaming = true; 5856 (void)sve_access_check(s); 5857 return true; 5858 } 5859 5860 /* 5861 * Move Prefix 5862 * 5863 * TODO: The implementation so far could handle predicated merging movprfx. 5864 * The helper functions as written take an extra source register to 5865 * use in the operation, but the result is only written when predication 5866 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 5867 * to allow the final write back to the destination to be unconditional. 5868 * For predicated zeroing movprfx, we need to rearrange the helpers to 5869 * allow the final write back to zero inactives. 5870 * 5871 * In the meantime, just emit the moves. 5872 */ 5873 5874 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) 5875 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) 5876 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) 5877 5878 /* 5879 * SVE2 Integer Multiply - Unpredicated 5880 */ 5881 5882 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) 5883 5884 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { 5885 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 5886 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 5887 }; 5888 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5889 smulh_zzz_fns[a->esz], a, 0) 5890 5891 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { 5892 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 5893 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 5894 }; 5895 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5896 umulh_zzz_fns[a->esz], a, 0) 5897 5898 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5899 gen_helper_gvec_pmul_b, a, 0) 5900 5901 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { 5902 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, 5903 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, 5904 }; 5905 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5906 sqdmulh_zzz_fns[a->esz], a, 0) 5907 5908 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { 5909 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 5910 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 5911 }; 5912 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 5913 sqrdmulh_zzz_fns[a->esz], a, 0) 5914 5915 /* 5916 * SVE2 Integer - Predicated 5917 */ 5918 5919 static gen_helper_gvec_4 * const sadlp_fns[4] = { 5920 NULL, gen_helper_sve2_sadalp_zpzz_h, 5921 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, 5922 }; 5923 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5924 sadlp_fns[a->esz], a, 0) 5925 5926 static gen_helper_gvec_4 * const uadlp_fns[4] = { 5927 NULL, gen_helper_sve2_uadalp_zpzz_h, 5928 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, 5929 }; 5930 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, 5931 uadlp_fns[a->esz], a, 0) 5932 5933 /* 5934 * SVE2 integer unary operations (predicated) 5935 */ 5936 5937 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, 5938 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) 5939 5940 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, 5941 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) 5942 5943 static gen_helper_gvec_3 * const sqabs_fns[4] = { 5944 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 5945 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 5946 }; 5947 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) 5948 5949 static gen_helper_gvec_3 * const sqneg_fns[4] = { 5950 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 5951 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 5952 }; 5953 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) 5954 5955 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) 5956 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) 5957 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) 5958 5959 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) 5960 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) 5961 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) 5962 5963 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) 5964 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) 5965 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) 5966 5967 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) 5968 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) 5969 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) 5970 5971 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) 5972 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) 5973 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) 5974 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) 5975 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) 5976 5977 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) 5978 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) 5979 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) 5980 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) 5981 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) 5982 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) 5983 5984 /* 5985 * SVE2 Widening Integer Arithmetic 5986 */ 5987 5988 static gen_helper_gvec_3 * const saddl_fns[4] = { 5989 NULL, gen_helper_sve2_saddl_h, 5990 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, 5991 }; 5992 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 5993 saddl_fns[a->esz], a, 0) 5994 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 5995 saddl_fns[a->esz], a, 3) 5996 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 5997 saddl_fns[a->esz], a, 2) 5998 5999 static gen_helper_gvec_3 * const ssubl_fns[4] = { 6000 NULL, gen_helper_sve2_ssubl_h, 6001 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, 6002 }; 6003 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6004 ssubl_fns[a->esz], a, 0) 6005 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6006 ssubl_fns[a->esz], a, 3) 6007 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, 6008 ssubl_fns[a->esz], a, 2) 6009 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, 6010 ssubl_fns[a->esz], a, 1) 6011 6012 static gen_helper_gvec_3 * const sabdl_fns[4] = { 6013 NULL, gen_helper_sve2_sabdl_h, 6014 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, 6015 }; 6016 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6017 sabdl_fns[a->esz], a, 0) 6018 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6019 sabdl_fns[a->esz], a, 3) 6020 6021 static gen_helper_gvec_3 * const uaddl_fns[4] = { 6022 NULL, gen_helper_sve2_uaddl_h, 6023 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, 6024 }; 6025 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6026 uaddl_fns[a->esz], a, 0) 6027 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6028 uaddl_fns[a->esz], a, 3) 6029 6030 static gen_helper_gvec_3 * const usubl_fns[4] = { 6031 NULL, gen_helper_sve2_usubl_h, 6032 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, 6033 }; 6034 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6035 usubl_fns[a->esz], a, 0) 6036 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6037 usubl_fns[a->esz], a, 3) 6038 6039 static gen_helper_gvec_3 * const uabdl_fns[4] = { 6040 NULL, gen_helper_sve2_uabdl_h, 6041 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, 6042 }; 6043 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, 6044 uabdl_fns[a->esz], a, 0) 6045 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, 6046 uabdl_fns[a->esz], a, 3) 6047 6048 static gen_helper_gvec_3 * const sqdmull_fns[4] = { 6049 NULL, gen_helper_sve2_sqdmull_zzz_h, 6050 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, 6051 }; 6052 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6053 sqdmull_fns[a->esz], a, 0) 6054 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6055 sqdmull_fns[a->esz], a, 3) 6056 6057 static gen_helper_gvec_3 * const smull_fns[4] = { 6058 NULL, gen_helper_sve2_smull_zzz_h, 6059 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, 6060 }; 6061 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6062 smull_fns[a->esz], a, 0) 6063 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6064 smull_fns[a->esz], a, 3) 6065 6066 static gen_helper_gvec_3 * const umull_fns[4] = { 6067 NULL, gen_helper_sve2_umull_zzz_h, 6068 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, 6069 }; 6070 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6071 umull_fns[a->esz], a, 0) 6072 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, 6073 umull_fns[a->esz], a, 3) 6074 6075 static gen_helper_gvec_3 * const eoril_fns[4] = { 6076 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 6077 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 6078 }; 6079 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) 6080 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) 6081 6082 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 6083 { 6084 static gen_helper_gvec_3 * const fns[4] = { 6085 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 6086 NULL, gen_helper_sve2_pmull_d, 6087 }; 6088 6089 if (a->esz == 0) { 6090 if (!dc_isar_feature(aa64_sve2_pmull128, s)) { 6091 return false; 6092 } 6093 s->is_nonstreaming = true; 6094 } else if (!dc_isar_feature(aa64_sve, s)) { 6095 return false; 6096 } 6097 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); 6098 } 6099 6100 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) 6101 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) 6102 6103 static gen_helper_gvec_3 * const saddw_fns[4] = { 6104 NULL, gen_helper_sve2_saddw_h, 6105 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, 6106 }; 6107 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) 6108 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) 6109 6110 static gen_helper_gvec_3 * const ssubw_fns[4] = { 6111 NULL, gen_helper_sve2_ssubw_h, 6112 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, 6113 }; 6114 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) 6115 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) 6116 6117 static gen_helper_gvec_3 * const uaddw_fns[4] = { 6118 NULL, gen_helper_sve2_uaddw_h, 6119 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, 6120 }; 6121 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) 6122 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) 6123 6124 static gen_helper_gvec_3 * const usubw_fns[4] = { 6125 NULL, gen_helper_sve2_usubw_h, 6126 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, 6127 }; 6128 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) 6129 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) 6130 6131 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6132 { 6133 int top = imm & 1; 6134 int shl = imm >> 1; 6135 int halfbits = 4 << vece; 6136 6137 if (top) { 6138 if (shl == halfbits) { 6139 TCGv_vec t = tcg_temp_new_vec_matching(d); 6140 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6141 tcg_gen_and_vec(vece, d, n, t); 6142 } else { 6143 tcg_gen_sari_vec(vece, d, n, halfbits); 6144 tcg_gen_shli_vec(vece, d, d, shl); 6145 } 6146 } else { 6147 tcg_gen_shli_vec(vece, d, n, halfbits); 6148 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 6149 } 6150 } 6151 6152 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 6153 { 6154 int halfbits = 4 << vece; 6155 int top = imm & 1; 6156 int shl = (imm >> 1); 6157 int shift; 6158 uint64_t mask; 6159 6160 mask = MAKE_64BIT_MASK(0, halfbits); 6161 mask <<= shl; 6162 mask = dup_const(vece, mask); 6163 6164 shift = shl - top * halfbits; 6165 if (shift < 0) { 6166 tcg_gen_shri_i64(d, n, -shift); 6167 } else { 6168 tcg_gen_shli_i64(d, n, shift); 6169 } 6170 tcg_gen_andi_i64(d, d, mask); 6171 } 6172 6173 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6174 { 6175 gen_ushll_i64(MO_16, d, n, imm); 6176 } 6177 6178 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6179 { 6180 gen_ushll_i64(MO_32, d, n, imm); 6181 } 6182 6183 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 6184 { 6185 gen_ushll_i64(MO_64, d, n, imm); 6186 } 6187 6188 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 6189 { 6190 int halfbits = 4 << vece; 6191 int top = imm & 1; 6192 int shl = imm >> 1; 6193 6194 if (top) { 6195 if (shl == halfbits) { 6196 TCGv_vec t = tcg_temp_new_vec_matching(d); 6197 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 6198 tcg_gen_and_vec(vece, d, n, t); 6199 } else { 6200 tcg_gen_shri_vec(vece, d, n, halfbits); 6201 tcg_gen_shli_vec(vece, d, d, shl); 6202 } 6203 } else { 6204 if (shl == 0) { 6205 TCGv_vec t = tcg_temp_new_vec_matching(d); 6206 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6207 tcg_gen_and_vec(vece, d, n, t); 6208 } else { 6209 tcg_gen_shli_vec(vece, d, n, halfbits); 6210 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 6211 } 6212 } 6213 } 6214 6215 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, 6216 const GVecGen2i ops[3], bool sel) 6217 { 6218 6219 if (a->esz < 0 || a->esz > 2) { 6220 return false; 6221 } 6222 if (sve_access_check(s)) { 6223 unsigned vsz = vec_full_reg_size(s); 6224 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6225 vec_full_reg_offset(s, a->rn), 6226 vsz, vsz, (a->imm << 1) | sel, 6227 &ops[a->esz]); 6228 } 6229 return true; 6230 } 6231 6232 static const TCGOpcode sshll_list[] = { 6233 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 6234 }; 6235 static const GVecGen2i sshll_ops[3] = { 6236 { .fniv = gen_sshll_vec, 6237 .opt_opc = sshll_list, 6238 .fno = gen_helper_sve2_sshll_h, 6239 .vece = MO_16 }, 6240 { .fniv = gen_sshll_vec, 6241 .opt_opc = sshll_list, 6242 .fno = gen_helper_sve2_sshll_s, 6243 .vece = MO_32 }, 6244 { .fniv = gen_sshll_vec, 6245 .opt_opc = sshll_list, 6246 .fno = gen_helper_sve2_sshll_d, 6247 .vece = MO_64 } 6248 }; 6249 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) 6250 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) 6251 6252 static const TCGOpcode ushll_list[] = { 6253 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 6254 }; 6255 static const GVecGen2i ushll_ops[3] = { 6256 { .fni8 = gen_ushll16_i64, 6257 .fniv = gen_ushll_vec, 6258 .opt_opc = ushll_list, 6259 .fno = gen_helper_sve2_ushll_h, 6260 .vece = MO_16 }, 6261 { .fni8 = gen_ushll32_i64, 6262 .fniv = gen_ushll_vec, 6263 .opt_opc = ushll_list, 6264 .fno = gen_helper_sve2_ushll_s, 6265 .vece = MO_32 }, 6266 { .fni8 = gen_ushll64_i64, 6267 .fniv = gen_ushll_vec, 6268 .opt_opc = ushll_list, 6269 .fno = gen_helper_sve2_ushll_d, 6270 .vece = MO_64 }, 6271 }; 6272 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) 6273 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) 6274 6275 static gen_helper_gvec_3 * const bext_fns[4] = { 6276 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 6277 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 6278 }; 6279 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6280 bext_fns[a->esz], a, 0) 6281 6282 static gen_helper_gvec_3 * const bdep_fns[4] = { 6283 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 6284 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 6285 }; 6286 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6287 bdep_fns[a->esz], a, 0) 6288 6289 static gen_helper_gvec_3 * const bgrp_fns[4] = { 6290 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 6291 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 6292 }; 6293 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, 6294 bgrp_fns[a->esz], a, 0) 6295 6296 static gen_helper_gvec_3 * const cadd_fns[4] = { 6297 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 6298 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, 6299 }; 6300 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6301 cadd_fns[a->esz], a, 0) 6302 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6303 cadd_fns[a->esz], a, 1) 6304 6305 static gen_helper_gvec_3 * const sqcadd_fns[4] = { 6306 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 6307 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, 6308 }; 6309 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, 6310 sqcadd_fns[a->esz], a, 0) 6311 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, 6312 sqcadd_fns[a->esz], a, 1) 6313 6314 static gen_helper_gvec_4 * const sabal_fns[4] = { 6315 NULL, gen_helper_sve2_sabal_h, 6316 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, 6317 }; 6318 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) 6319 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) 6320 6321 static gen_helper_gvec_4 * const uabal_fns[4] = { 6322 NULL, gen_helper_sve2_uabal_h, 6323 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, 6324 }; 6325 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) 6326 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) 6327 6328 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 6329 { 6330 static gen_helper_gvec_4 * const fns[2] = { 6331 gen_helper_sve2_adcl_s, 6332 gen_helper_sve2_adcl_d, 6333 }; 6334 /* 6335 * Note that in this case the ESZ field encodes both size and sign. 6336 * Split out 'subtract' into bit 1 of the data field for the helper. 6337 */ 6338 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); 6339 } 6340 6341 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) 6342 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) 6343 6344 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) 6345 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) 6346 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) 6347 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) 6348 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) 6349 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) 6350 6351 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) 6352 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) 6353 6354 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, 6355 const GVecGen2 ops[3]) 6356 { 6357 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { 6358 return false; 6359 } 6360 if (sve_access_check(s)) { 6361 unsigned vsz = vec_full_reg_size(s); 6362 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 6363 vec_full_reg_offset(s, a->rn), 6364 vsz, vsz, &ops[a->esz]); 6365 } 6366 return true; 6367 } 6368 6369 static const TCGOpcode sqxtn_list[] = { 6370 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 6371 }; 6372 6373 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6374 { 6375 TCGv_vec t = tcg_temp_new_vec_matching(d); 6376 int halfbits = 4 << vece; 6377 int64_t mask = (1ull << halfbits) - 1; 6378 int64_t min = -1ull << (halfbits - 1); 6379 int64_t max = -min - 1; 6380 6381 tcg_gen_dupi_vec(vece, t, min); 6382 tcg_gen_smax_vec(vece, d, n, t); 6383 tcg_gen_dupi_vec(vece, t, max); 6384 tcg_gen_smin_vec(vece, d, d, t); 6385 tcg_gen_dupi_vec(vece, t, mask); 6386 tcg_gen_and_vec(vece, d, d, t); 6387 } 6388 6389 static const GVecGen2 sqxtnb_ops[3] = { 6390 { .fniv = gen_sqxtnb_vec, 6391 .opt_opc = sqxtn_list, 6392 .fno = gen_helper_sve2_sqxtnb_h, 6393 .vece = MO_16 }, 6394 { .fniv = gen_sqxtnb_vec, 6395 .opt_opc = sqxtn_list, 6396 .fno = gen_helper_sve2_sqxtnb_s, 6397 .vece = MO_32 }, 6398 { .fniv = gen_sqxtnb_vec, 6399 .opt_opc = sqxtn_list, 6400 .fno = gen_helper_sve2_sqxtnb_d, 6401 .vece = MO_64 }, 6402 }; 6403 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) 6404 6405 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6406 { 6407 TCGv_vec t = tcg_temp_new_vec_matching(d); 6408 int halfbits = 4 << vece; 6409 int64_t mask = (1ull << halfbits) - 1; 6410 int64_t min = -1ull << (halfbits - 1); 6411 int64_t max = -min - 1; 6412 6413 tcg_gen_dupi_vec(vece, t, min); 6414 tcg_gen_smax_vec(vece, n, n, t); 6415 tcg_gen_dupi_vec(vece, t, max); 6416 tcg_gen_smin_vec(vece, n, n, t); 6417 tcg_gen_shli_vec(vece, n, n, halfbits); 6418 tcg_gen_dupi_vec(vece, t, mask); 6419 tcg_gen_bitsel_vec(vece, d, t, d, n); 6420 } 6421 6422 static const GVecGen2 sqxtnt_ops[3] = { 6423 { .fniv = gen_sqxtnt_vec, 6424 .opt_opc = sqxtn_list, 6425 .load_dest = true, 6426 .fno = gen_helper_sve2_sqxtnt_h, 6427 .vece = MO_16 }, 6428 { .fniv = gen_sqxtnt_vec, 6429 .opt_opc = sqxtn_list, 6430 .load_dest = true, 6431 .fno = gen_helper_sve2_sqxtnt_s, 6432 .vece = MO_32 }, 6433 { .fniv = gen_sqxtnt_vec, 6434 .opt_opc = sqxtn_list, 6435 .load_dest = true, 6436 .fno = gen_helper_sve2_sqxtnt_d, 6437 .vece = MO_64 }, 6438 }; 6439 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) 6440 6441 static const TCGOpcode uqxtn_list[] = { 6442 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 6443 }; 6444 6445 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6446 { 6447 TCGv_vec t = tcg_temp_new_vec_matching(d); 6448 int halfbits = 4 << vece; 6449 int64_t max = (1ull << halfbits) - 1; 6450 6451 tcg_gen_dupi_vec(vece, t, max); 6452 tcg_gen_umin_vec(vece, d, n, t); 6453 } 6454 6455 static const GVecGen2 uqxtnb_ops[3] = { 6456 { .fniv = gen_uqxtnb_vec, 6457 .opt_opc = uqxtn_list, 6458 .fno = gen_helper_sve2_uqxtnb_h, 6459 .vece = MO_16 }, 6460 { .fniv = gen_uqxtnb_vec, 6461 .opt_opc = uqxtn_list, 6462 .fno = gen_helper_sve2_uqxtnb_s, 6463 .vece = MO_32 }, 6464 { .fniv = gen_uqxtnb_vec, 6465 .opt_opc = uqxtn_list, 6466 .fno = gen_helper_sve2_uqxtnb_d, 6467 .vece = MO_64 }, 6468 }; 6469 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) 6470 6471 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6472 { 6473 TCGv_vec t = tcg_temp_new_vec_matching(d); 6474 int halfbits = 4 << vece; 6475 int64_t max = (1ull << halfbits) - 1; 6476 6477 tcg_gen_dupi_vec(vece, t, max); 6478 tcg_gen_umin_vec(vece, n, n, t); 6479 tcg_gen_shli_vec(vece, n, n, halfbits); 6480 tcg_gen_bitsel_vec(vece, d, t, d, n); 6481 } 6482 6483 static const GVecGen2 uqxtnt_ops[3] = { 6484 { .fniv = gen_uqxtnt_vec, 6485 .opt_opc = uqxtn_list, 6486 .load_dest = true, 6487 .fno = gen_helper_sve2_uqxtnt_h, 6488 .vece = MO_16 }, 6489 { .fniv = gen_uqxtnt_vec, 6490 .opt_opc = uqxtn_list, 6491 .load_dest = true, 6492 .fno = gen_helper_sve2_uqxtnt_s, 6493 .vece = MO_32 }, 6494 { .fniv = gen_uqxtnt_vec, 6495 .opt_opc = uqxtn_list, 6496 .load_dest = true, 6497 .fno = gen_helper_sve2_uqxtnt_d, 6498 .vece = MO_64 }, 6499 }; 6500 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) 6501 6502 static const TCGOpcode sqxtun_list[] = { 6503 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 6504 }; 6505 6506 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6507 { 6508 TCGv_vec t = tcg_temp_new_vec_matching(d); 6509 int halfbits = 4 << vece; 6510 int64_t max = (1ull << halfbits) - 1; 6511 6512 tcg_gen_dupi_vec(vece, t, 0); 6513 tcg_gen_smax_vec(vece, d, n, t); 6514 tcg_gen_dupi_vec(vece, t, max); 6515 tcg_gen_umin_vec(vece, d, d, t); 6516 } 6517 6518 static const GVecGen2 sqxtunb_ops[3] = { 6519 { .fniv = gen_sqxtunb_vec, 6520 .opt_opc = sqxtun_list, 6521 .fno = gen_helper_sve2_sqxtunb_h, 6522 .vece = MO_16 }, 6523 { .fniv = gen_sqxtunb_vec, 6524 .opt_opc = sqxtun_list, 6525 .fno = gen_helper_sve2_sqxtunb_s, 6526 .vece = MO_32 }, 6527 { .fniv = gen_sqxtunb_vec, 6528 .opt_opc = sqxtun_list, 6529 .fno = gen_helper_sve2_sqxtunb_d, 6530 .vece = MO_64 }, 6531 }; 6532 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) 6533 6534 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 6535 { 6536 TCGv_vec t = tcg_temp_new_vec_matching(d); 6537 int halfbits = 4 << vece; 6538 int64_t max = (1ull << halfbits) - 1; 6539 6540 tcg_gen_dupi_vec(vece, t, 0); 6541 tcg_gen_smax_vec(vece, n, n, t); 6542 tcg_gen_dupi_vec(vece, t, max); 6543 tcg_gen_umin_vec(vece, n, n, t); 6544 tcg_gen_shli_vec(vece, n, n, halfbits); 6545 tcg_gen_bitsel_vec(vece, d, t, d, n); 6546 } 6547 6548 static const GVecGen2 sqxtunt_ops[3] = { 6549 { .fniv = gen_sqxtunt_vec, 6550 .opt_opc = sqxtun_list, 6551 .load_dest = true, 6552 .fno = gen_helper_sve2_sqxtunt_h, 6553 .vece = MO_16 }, 6554 { .fniv = gen_sqxtunt_vec, 6555 .opt_opc = sqxtun_list, 6556 .load_dest = true, 6557 .fno = gen_helper_sve2_sqxtunt_s, 6558 .vece = MO_32 }, 6559 { .fniv = gen_sqxtunt_vec, 6560 .opt_opc = sqxtun_list, 6561 .load_dest = true, 6562 .fno = gen_helper_sve2_sqxtunt_d, 6563 .vece = MO_64 }, 6564 }; 6565 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) 6566 6567 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, 6568 const GVecGen2i ops[3]) 6569 { 6570 if (a->esz < 0 || a->esz > MO_32) { 6571 return false; 6572 } 6573 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 6574 if (sve_access_check(s)) { 6575 unsigned vsz = vec_full_reg_size(s); 6576 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 6577 vec_full_reg_offset(s, a->rn), 6578 vsz, vsz, a->imm, &ops[a->esz]); 6579 } 6580 return true; 6581 } 6582 6583 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6584 { 6585 int halfbits = 4 << vece; 6586 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6587 6588 tcg_gen_shri_i64(d, n, shr); 6589 tcg_gen_andi_i64(d, d, mask); 6590 } 6591 6592 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6593 { 6594 gen_shrnb_i64(MO_16, d, n, shr); 6595 } 6596 6597 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6598 { 6599 gen_shrnb_i64(MO_32, d, n, shr); 6600 } 6601 6602 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6603 { 6604 gen_shrnb_i64(MO_64, d, n, shr); 6605 } 6606 6607 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6608 { 6609 TCGv_vec t = tcg_temp_new_vec_matching(d); 6610 int halfbits = 4 << vece; 6611 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6612 6613 tcg_gen_shri_vec(vece, n, n, shr); 6614 tcg_gen_dupi_vec(vece, t, mask); 6615 tcg_gen_and_vec(vece, d, n, t); 6616 } 6617 6618 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; 6619 static const GVecGen2i shrnb_ops[3] = { 6620 { .fni8 = gen_shrnb16_i64, 6621 .fniv = gen_shrnb_vec, 6622 .opt_opc = shrnb_vec_list, 6623 .fno = gen_helper_sve2_shrnb_h, 6624 .vece = MO_16 }, 6625 { .fni8 = gen_shrnb32_i64, 6626 .fniv = gen_shrnb_vec, 6627 .opt_opc = shrnb_vec_list, 6628 .fno = gen_helper_sve2_shrnb_s, 6629 .vece = MO_32 }, 6630 { .fni8 = gen_shrnb64_i64, 6631 .fniv = gen_shrnb_vec, 6632 .opt_opc = shrnb_vec_list, 6633 .fno = gen_helper_sve2_shrnb_d, 6634 .vece = MO_64 }, 6635 }; 6636 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) 6637 6638 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 6639 { 6640 int halfbits = 4 << vece; 6641 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 6642 6643 tcg_gen_shli_i64(n, n, halfbits - shr); 6644 tcg_gen_andi_i64(n, n, ~mask); 6645 tcg_gen_andi_i64(d, d, mask); 6646 tcg_gen_or_i64(d, d, n); 6647 } 6648 6649 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6650 { 6651 gen_shrnt_i64(MO_16, d, n, shr); 6652 } 6653 6654 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6655 { 6656 gen_shrnt_i64(MO_32, d, n, shr); 6657 } 6658 6659 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 6660 { 6661 tcg_gen_shri_i64(n, n, shr); 6662 tcg_gen_deposit_i64(d, d, n, 32, 32); 6663 } 6664 6665 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 6666 { 6667 TCGv_vec t = tcg_temp_new_vec_matching(d); 6668 int halfbits = 4 << vece; 6669 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 6670 6671 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 6672 tcg_gen_dupi_vec(vece, t, mask); 6673 tcg_gen_bitsel_vec(vece, d, t, d, n); 6674 } 6675 6676 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; 6677 static const GVecGen2i shrnt_ops[3] = { 6678 { .fni8 = gen_shrnt16_i64, 6679 .fniv = gen_shrnt_vec, 6680 .opt_opc = shrnt_vec_list, 6681 .load_dest = true, 6682 .fno = gen_helper_sve2_shrnt_h, 6683 .vece = MO_16 }, 6684 { .fni8 = gen_shrnt32_i64, 6685 .fniv = gen_shrnt_vec, 6686 .opt_opc = shrnt_vec_list, 6687 .load_dest = true, 6688 .fno = gen_helper_sve2_shrnt_s, 6689 .vece = MO_32 }, 6690 { .fni8 = gen_shrnt64_i64, 6691 .fniv = gen_shrnt_vec, 6692 .opt_opc = shrnt_vec_list, 6693 .load_dest = true, 6694 .fno = gen_helper_sve2_shrnt_d, 6695 .vece = MO_64 }, 6696 }; 6697 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) 6698 6699 static const GVecGen2i rshrnb_ops[3] = { 6700 { .fno = gen_helper_sve2_rshrnb_h }, 6701 { .fno = gen_helper_sve2_rshrnb_s }, 6702 { .fno = gen_helper_sve2_rshrnb_d }, 6703 }; 6704 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) 6705 6706 static const GVecGen2i rshrnt_ops[3] = { 6707 { .fno = gen_helper_sve2_rshrnt_h }, 6708 { .fno = gen_helper_sve2_rshrnt_s }, 6709 { .fno = gen_helper_sve2_rshrnt_d }, 6710 }; 6711 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) 6712 6713 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 6714 TCGv_vec n, int64_t shr) 6715 { 6716 TCGv_vec t = tcg_temp_new_vec_matching(d); 6717 int halfbits = 4 << vece; 6718 6719 tcg_gen_sari_vec(vece, n, n, shr); 6720 tcg_gen_dupi_vec(vece, t, 0); 6721 tcg_gen_smax_vec(vece, n, n, t); 6722 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6723 tcg_gen_umin_vec(vece, d, n, t); 6724 } 6725 6726 static const TCGOpcode sqshrunb_vec_list[] = { 6727 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6728 }; 6729 static const GVecGen2i sqshrunb_ops[3] = { 6730 { .fniv = gen_sqshrunb_vec, 6731 .opt_opc = sqshrunb_vec_list, 6732 .fno = gen_helper_sve2_sqshrunb_h, 6733 .vece = MO_16 }, 6734 { .fniv = gen_sqshrunb_vec, 6735 .opt_opc = sqshrunb_vec_list, 6736 .fno = gen_helper_sve2_sqshrunb_s, 6737 .vece = MO_32 }, 6738 { .fniv = gen_sqshrunb_vec, 6739 .opt_opc = sqshrunb_vec_list, 6740 .fno = gen_helper_sve2_sqshrunb_d, 6741 .vece = MO_64 }, 6742 }; 6743 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) 6744 6745 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 6746 TCGv_vec n, int64_t shr) 6747 { 6748 TCGv_vec t = tcg_temp_new_vec_matching(d); 6749 int halfbits = 4 << vece; 6750 6751 tcg_gen_sari_vec(vece, n, n, shr); 6752 tcg_gen_dupi_vec(vece, t, 0); 6753 tcg_gen_smax_vec(vece, n, n, t); 6754 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6755 tcg_gen_umin_vec(vece, n, n, t); 6756 tcg_gen_shli_vec(vece, n, n, halfbits); 6757 tcg_gen_bitsel_vec(vece, d, t, d, n); 6758 } 6759 6760 static const TCGOpcode sqshrunt_vec_list[] = { 6761 INDEX_op_shli_vec, INDEX_op_sari_vec, 6762 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 6763 }; 6764 static const GVecGen2i sqshrunt_ops[3] = { 6765 { .fniv = gen_sqshrunt_vec, 6766 .opt_opc = sqshrunt_vec_list, 6767 .load_dest = true, 6768 .fno = gen_helper_sve2_sqshrunt_h, 6769 .vece = MO_16 }, 6770 { .fniv = gen_sqshrunt_vec, 6771 .opt_opc = sqshrunt_vec_list, 6772 .load_dest = true, 6773 .fno = gen_helper_sve2_sqshrunt_s, 6774 .vece = MO_32 }, 6775 { .fniv = gen_sqshrunt_vec, 6776 .opt_opc = sqshrunt_vec_list, 6777 .load_dest = true, 6778 .fno = gen_helper_sve2_sqshrunt_d, 6779 .vece = MO_64 }, 6780 }; 6781 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) 6782 6783 static const GVecGen2i sqrshrunb_ops[3] = { 6784 { .fno = gen_helper_sve2_sqrshrunb_h }, 6785 { .fno = gen_helper_sve2_sqrshrunb_s }, 6786 { .fno = gen_helper_sve2_sqrshrunb_d }, 6787 }; 6788 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) 6789 6790 static const GVecGen2i sqrshrunt_ops[3] = { 6791 { .fno = gen_helper_sve2_sqrshrunt_h }, 6792 { .fno = gen_helper_sve2_sqrshrunt_s }, 6793 { .fno = gen_helper_sve2_sqrshrunt_d }, 6794 }; 6795 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) 6796 6797 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 6798 TCGv_vec n, int64_t shr) 6799 { 6800 TCGv_vec t = tcg_temp_new_vec_matching(d); 6801 int halfbits = 4 << vece; 6802 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6803 int64_t min = -max - 1; 6804 6805 tcg_gen_sari_vec(vece, n, n, shr); 6806 tcg_gen_dupi_vec(vece, t, min); 6807 tcg_gen_smax_vec(vece, n, n, t); 6808 tcg_gen_dupi_vec(vece, t, max); 6809 tcg_gen_smin_vec(vece, n, n, t); 6810 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6811 tcg_gen_and_vec(vece, d, n, t); 6812 } 6813 6814 static const TCGOpcode sqshrnb_vec_list[] = { 6815 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6816 }; 6817 static const GVecGen2i sqshrnb_ops[3] = { 6818 { .fniv = gen_sqshrnb_vec, 6819 .opt_opc = sqshrnb_vec_list, 6820 .fno = gen_helper_sve2_sqshrnb_h, 6821 .vece = MO_16 }, 6822 { .fniv = gen_sqshrnb_vec, 6823 .opt_opc = sqshrnb_vec_list, 6824 .fno = gen_helper_sve2_sqshrnb_s, 6825 .vece = MO_32 }, 6826 { .fniv = gen_sqshrnb_vec, 6827 .opt_opc = sqshrnb_vec_list, 6828 .fno = gen_helper_sve2_sqshrnb_d, 6829 .vece = MO_64 }, 6830 }; 6831 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) 6832 6833 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 6834 TCGv_vec n, int64_t shr) 6835 { 6836 TCGv_vec t = tcg_temp_new_vec_matching(d); 6837 int halfbits = 4 << vece; 6838 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 6839 int64_t min = -max - 1; 6840 6841 tcg_gen_sari_vec(vece, n, n, shr); 6842 tcg_gen_dupi_vec(vece, t, min); 6843 tcg_gen_smax_vec(vece, n, n, t); 6844 tcg_gen_dupi_vec(vece, t, max); 6845 tcg_gen_smin_vec(vece, n, n, t); 6846 tcg_gen_shli_vec(vece, n, n, halfbits); 6847 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6848 tcg_gen_bitsel_vec(vece, d, t, d, n); 6849 } 6850 6851 static const TCGOpcode sqshrnt_vec_list[] = { 6852 INDEX_op_shli_vec, INDEX_op_sari_vec, 6853 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 6854 }; 6855 static const GVecGen2i sqshrnt_ops[3] = { 6856 { .fniv = gen_sqshrnt_vec, 6857 .opt_opc = sqshrnt_vec_list, 6858 .load_dest = true, 6859 .fno = gen_helper_sve2_sqshrnt_h, 6860 .vece = MO_16 }, 6861 { .fniv = gen_sqshrnt_vec, 6862 .opt_opc = sqshrnt_vec_list, 6863 .load_dest = true, 6864 .fno = gen_helper_sve2_sqshrnt_s, 6865 .vece = MO_32 }, 6866 { .fniv = gen_sqshrnt_vec, 6867 .opt_opc = sqshrnt_vec_list, 6868 .load_dest = true, 6869 .fno = gen_helper_sve2_sqshrnt_d, 6870 .vece = MO_64 }, 6871 }; 6872 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) 6873 6874 static const GVecGen2i sqrshrnb_ops[3] = { 6875 { .fno = gen_helper_sve2_sqrshrnb_h }, 6876 { .fno = gen_helper_sve2_sqrshrnb_s }, 6877 { .fno = gen_helper_sve2_sqrshrnb_d }, 6878 }; 6879 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) 6880 6881 static const GVecGen2i sqrshrnt_ops[3] = { 6882 { .fno = gen_helper_sve2_sqrshrnt_h }, 6883 { .fno = gen_helper_sve2_sqrshrnt_s }, 6884 { .fno = gen_helper_sve2_sqrshrnt_d }, 6885 }; 6886 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) 6887 6888 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 6889 TCGv_vec n, int64_t shr) 6890 { 6891 TCGv_vec t = tcg_temp_new_vec_matching(d); 6892 int halfbits = 4 << vece; 6893 6894 tcg_gen_shri_vec(vece, n, n, shr); 6895 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6896 tcg_gen_umin_vec(vece, d, n, t); 6897 } 6898 6899 static const TCGOpcode uqshrnb_vec_list[] = { 6900 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6901 }; 6902 static const GVecGen2i uqshrnb_ops[3] = { 6903 { .fniv = gen_uqshrnb_vec, 6904 .opt_opc = uqshrnb_vec_list, 6905 .fno = gen_helper_sve2_uqshrnb_h, 6906 .vece = MO_16 }, 6907 { .fniv = gen_uqshrnb_vec, 6908 .opt_opc = uqshrnb_vec_list, 6909 .fno = gen_helper_sve2_uqshrnb_s, 6910 .vece = MO_32 }, 6911 { .fniv = gen_uqshrnb_vec, 6912 .opt_opc = uqshrnb_vec_list, 6913 .fno = gen_helper_sve2_uqshrnb_d, 6914 .vece = MO_64 }, 6915 }; 6916 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) 6917 6918 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 6919 TCGv_vec n, int64_t shr) 6920 { 6921 TCGv_vec t = tcg_temp_new_vec_matching(d); 6922 int halfbits = 4 << vece; 6923 6924 tcg_gen_shri_vec(vece, n, n, shr); 6925 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 6926 tcg_gen_umin_vec(vece, n, n, t); 6927 tcg_gen_shli_vec(vece, n, n, halfbits); 6928 tcg_gen_bitsel_vec(vece, d, t, d, n); 6929 } 6930 6931 static const TCGOpcode uqshrnt_vec_list[] = { 6932 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 6933 }; 6934 static const GVecGen2i uqshrnt_ops[3] = { 6935 { .fniv = gen_uqshrnt_vec, 6936 .opt_opc = uqshrnt_vec_list, 6937 .load_dest = true, 6938 .fno = gen_helper_sve2_uqshrnt_h, 6939 .vece = MO_16 }, 6940 { .fniv = gen_uqshrnt_vec, 6941 .opt_opc = uqshrnt_vec_list, 6942 .load_dest = true, 6943 .fno = gen_helper_sve2_uqshrnt_s, 6944 .vece = MO_32 }, 6945 { .fniv = gen_uqshrnt_vec, 6946 .opt_opc = uqshrnt_vec_list, 6947 .load_dest = true, 6948 .fno = gen_helper_sve2_uqshrnt_d, 6949 .vece = MO_64 }, 6950 }; 6951 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) 6952 6953 static const GVecGen2i uqrshrnb_ops[3] = { 6954 { .fno = gen_helper_sve2_uqrshrnb_h }, 6955 { .fno = gen_helper_sve2_uqrshrnb_s }, 6956 { .fno = gen_helper_sve2_uqrshrnb_d }, 6957 }; 6958 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) 6959 6960 static const GVecGen2i uqrshrnt_ops[3] = { 6961 { .fno = gen_helper_sve2_uqrshrnt_h }, 6962 { .fno = gen_helper_sve2_uqrshrnt_s }, 6963 { .fno = gen_helper_sve2_uqrshrnt_d }, 6964 }; 6965 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) 6966 6967 #define DO_SVE2_ZZZ_NARROW(NAME, name) \ 6968 static gen_helper_gvec_3 * const name##_fns[4] = { \ 6969 NULL, gen_helper_sve2_##name##_h, \ 6970 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 6971 }; \ 6972 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ 6973 name##_fns[a->esz], a, 0) 6974 6975 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 6976 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 6977 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 6978 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 6979 6980 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 6981 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 6982 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 6983 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 6984 6985 static gen_helper_gvec_flags_4 * const match_fns[4] = { 6986 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL 6987 }; 6988 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) 6989 6990 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { 6991 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL 6992 }; 6993 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) 6994 6995 static gen_helper_gvec_4 * const histcnt_fns[4] = { 6996 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 6997 }; 6998 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, 6999 histcnt_fns[a->esz], a, 0) 7000 7001 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, 7002 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) 7003 7004 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) 7005 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) 7006 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) 7007 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) 7008 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) 7009 7010 /* 7011 * SVE Integer Multiply-Add (unpredicated) 7012 */ 7013 7014 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, 7015 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, 7016 0, FPST_FPCR) 7017 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, 7018 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, 7019 0, FPST_FPCR) 7020 7021 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { 7022 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 7023 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 7024 }; 7025 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7026 sqdmlal_zzzw_fns[a->esz], a, 0) 7027 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7028 sqdmlal_zzzw_fns[a->esz], a, 3) 7029 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7030 sqdmlal_zzzw_fns[a->esz], a, 2) 7031 7032 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { 7033 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 7034 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 7035 }; 7036 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7037 sqdmlsl_zzzw_fns[a->esz], a, 0) 7038 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7039 sqdmlsl_zzzw_fns[a->esz], a, 3) 7040 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, 7041 sqdmlsl_zzzw_fns[a->esz], a, 2) 7042 7043 static gen_helper_gvec_4 * const sqrdmlah_fns[] = { 7044 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 7045 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 7046 }; 7047 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7048 sqrdmlah_fns[a->esz], a, 0) 7049 7050 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { 7051 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 7052 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 7053 }; 7054 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, 7055 sqrdmlsh_fns[a->esz], a, 0) 7056 7057 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { 7058 NULL, gen_helper_sve2_smlal_zzzw_h, 7059 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 7060 }; 7061 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7062 smlal_zzzw_fns[a->esz], a, 0) 7063 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7064 smlal_zzzw_fns[a->esz], a, 1) 7065 7066 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { 7067 NULL, gen_helper_sve2_umlal_zzzw_h, 7068 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 7069 }; 7070 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7071 umlal_zzzw_fns[a->esz], a, 0) 7072 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7073 umlal_zzzw_fns[a->esz], a, 1) 7074 7075 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { 7076 NULL, gen_helper_sve2_smlsl_zzzw_h, 7077 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 7078 }; 7079 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7080 smlsl_zzzw_fns[a->esz], a, 0) 7081 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7082 smlsl_zzzw_fns[a->esz], a, 1) 7083 7084 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { 7085 NULL, gen_helper_sve2_umlsl_zzzw_h, 7086 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 7087 }; 7088 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7089 umlsl_zzzw_fns[a->esz], a, 0) 7090 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, 7091 umlsl_zzzw_fns[a->esz], a, 1) 7092 7093 static gen_helper_gvec_4 * const cmla_fns[] = { 7094 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 7095 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 7096 }; 7097 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7098 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7099 7100 static gen_helper_gvec_4 * const cdot_fns[] = { 7101 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d 7102 }; 7103 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7104 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7105 7106 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { 7107 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 7108 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 7109 }; 7110 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, 7111 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) 7112 7113 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7114 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) 7115 7116 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, 7117 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt) 7118 7119 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7120 gen_helper_crypto_aese, a, false) 7121 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, 7122 gen_helper_crypto_aese, a, true) 7123 7124 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7125 gen_helper_crypto_sm4e, a, 0) 7126 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, 7127 gen_helper_crypto_sm4ekey, a, 0) 7128 7129 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, 7130 gen_gvec_rax1, a) 7131 7132 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, 7133 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) 7134 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, 7135 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR) 7136 7137 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, 7138 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR) 7139 7140 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, 7141 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR) 7142 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, 7143 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR) 7144 7145 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, 7146 FPROUNDING_ODD, gen_helper_sve_fcvt_ds) 7147 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, 7148 FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds) 7149 7150 static gen_helper_gvec_3_ptr * const flogb_fns[] = { 7151 NULL, gen_helper_flogb_h, 7152 gen_helper_flogb_s, gen_helper_flogb_d 7153 }; 7154 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], 7155 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) 7156 7157 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 7158 { 7159 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, 7160 a->rd, a->rn, a->rm, a->ra, 7161 (sel << 1) | sub, cpu_env); 7162 } 7163 7164 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) 7165 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) 7166 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) 7167 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) 7168 7169 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 7170 { 7171 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, 7172 a->rd, a->rn, a->rm, a->ra, 7173 (a->index << 2) | (sel << 1) | sub, cpu_env); 7174 } 7175 7176 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) 7177 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) 7178 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) 7179 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) 7180 7181 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7182 gen_helper_gvec_smmla_b, a, 0) 7183 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7184 gen_helper_gvec_usmmla_b, a, 0) 7185 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, 7186 gen_helper_gvec_ummla_b, a, 0) 7187 7188 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7189 gen_helper_gvec_bfdot, a, 0) 7190 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz, 7191 gen_helper_gvec_bfdot_idx, a) 7192 7193 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, 7194 gen_helper_gvec_bfmmla, a, 0) 7195 7196 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 7197 { 7198 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, 7199 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR); 7200 } 7201 7202 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) 7203 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) 7204 7205 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 7206 { 7207 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, 7208 a->rd, a->rn, a->rm, a->ra, 7209 (a->index << 1) | sel, FPST_FPCR); 7210 } 7211 7212 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) 7213 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) 7214 7215 static bool trans_PSEL(DisasContext *s, arg_psel *a) 7216 { 7217 int vl = vec_full_reg_size(s); 7218 int pl = pred_gvec_reg_size(s); 7219 int elements = vl >> a->esz; 7220 TCGv_i64 tmp, didx, dbit; 7221 TCGv_ptr ptr; 7222 7223 if (!dc_isar_feature(aa64_sme, s)) { 7224 return false; 7225 } 7226 if (!sve_access_check(s)) { 7227 return true; 7228 } 7229 7230 tmp = tcg_temp_new_i64(); 7231 dbit = tcg_temp_new_i64(); 7232 didx = tcg_temp_new_i64(); 7233 ptr = tcg_temp_new_ptr(); 7234 7235 /* Compute the predicate element. */ 7236 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); 7237 if (is_power_of_2(elements)) { 7238 tcg_gen_andi_i64(tmp, tmp, elements - 1); 7239 } else { 7240 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); 7241 } 7242 7243 /* Extract the predicate byte and bit indices. */ 7244 tcg_gen_shli_i64(tmp, tmp, a->esz); 7245 tcg_gen_andi_i64(dbit, tmp, 7); 7246 tcg_gen_shri_i64(didx, tmp, 3); 7247 if (HOST_BIG_ENDIAN) { 7248 tcg_gen_xori_i64(didx, didx, 7); 7249 } 7250 7251 /* Load the predicate word. */ 7252 tcg_gen_trunc_i64_ptr(ptr, didx); 7253 tcg_gen_add_ptr(ptr, ptr, cpu_env); 7254 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); 7255 7256 /* Extract the predicate bit and replicate to MO_64. */ 7257 tcg_gen_shr_i64(tmp, tmp, dbit); 7258 tcg_gen_andi_i64(tmp, tmp, 1); 7259 tcg_gen_neg_i64(tmp, tmp); 7260 7261 /* Apply to either copy the source, or write zeros. */ 7262 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), 7263 pred_full_reg_offset(s, a->pn), tmp, pl, pl); 7264 return true; 7265 } 7266 7267 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7268 { 7269 tcg_gen_smax_i32(d, a, n); 7270 tcg_gen_smin_i32(d, d, m); 7271 } 7272 7273 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7274 { 7275 tcg_gen_smax_i64(d, a, n); 7276 tcg_gen_smin_i64(d, d, m); 7277 } 7278 7279 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7280 TCGv_vec m, TCGv_vec a) 7281 { 7282 tcg_gen_smax_vec(vece, d, a, n); 7283 tcg_gen_smin_vec(vece, d, d, m); 7284 } 7285 7286 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7287 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7288 { 7289 static const TCGOpcode vecop[] = { 7290 INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7291 }; 7292 static const GVecGen4 ops[4] = { 7293 { .fniv = gen_sclamp_vec, 7294 .fno = gen_helper_gvec_sclamp_b, 7295 .opt_opc = vecop, 7296 .vece = MO_8 }, 7297 { .fniv = gen_sclamp_vec, 7298 .fno = gen_helper_gvec_sclamp_h, 7299 .opt_opc = vecop, 7300 .vece = MO_16 }, 7301 { .fni4 = gen_sclamp_i32, 7302 .fniv = gen_sclamp_vec, 7303 .fno = gen_helper_gvec_sclamp_s, 7304 .opt_opc = vecop, 7305 .vece = MO_32 }, 7306 { .fni8 = gen_sclamp_i64, 7307 .fniv = gen_sclamp_vec, 7308 .fno = gen_helper_gvec_sclamp_d, 7309 .opt_opc = vecop, 7310 .vece = MO_64, 7311 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7312 }; 7313 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7314 } 7315 7316 TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) 7317 7318 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) 7319 { 7320 tcg_gen_umax_i32(d, a, n); 7321 tcg_gen_umin_i32(d, d, m); 7322 } 7323 7324 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) 7325 { 7326 tcg_gen_umax_i64(d, a, n); 7327 tcg_gen_umin_i64(d, d, m); 7328 } 7329 7330 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 7331 TCGv_vec m, TCGv_vec a) 7332 { 7333 tcg_gen_umax_vec(vece, d, a, n); 7334 tcg_gen_umin_vec(vece, d, d, m); 7335 } 7336 7337 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 7338 uint32_t a, uint32_t oprsz, uint32_t maxsz) 7339 { 7340 static const TCGOpcode vecop[] = { 7341 INDEX_op_umin_vec, INDEX_op_umax_vec, 0 7342 }; 7343 static const GVecGen4 ops[4] = { 7344 { .fniv = gen_uclamp_vec, 7345 .fno = gen_helper_gvec_uclamp_b, 7346 .opt_opc = vecop, 7347 .vece = MO_8 }, 7348 { .fniv = gen_uclamp_vec, 7349 .fno = gen_helper_gvec_uclamp_h, 7350 .opt_opc = vecop, 7351 .vece = MO_16 }, 7352 { .fni4 = gen_uclamp_i32, 7353 .fniv = gen_uclamp_vec, 7354 .fno = gen_helper_gvec_uclamp_s, 7355 .opt_opc = vecop, 7356 .vece = MO_32 }, 7357 { .fni8 = gen_uclamp_i64, 7358 .fniv = gen_uclamp_vec, 7359 .fno = gen_helper_gvec_uclamp_d, 7360 .opt_opc = vecop, 7361 .vece = MO_64, 7362 .prefer_i64 = TCG_TARGET_REG_BITS == 64 } 7363 }; 7364 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); 7365 } 7366 7367 TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) 7368