1 /*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "translate.h"
22 #include "translate-a64.h"
23 #include "fpu/softfloat.h"
24
25
26 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
27 TCGv_i64, uint32_t, uint32_t);
28
29 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
30 TCGv_ptr, TCGv_i32);
31 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
32 TCGv_ptr, TCGv_ptr, TCGv_i32);
33
34 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
35 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
36 TCGv_ptr, TCGv_i64, TCGv_i32);
37
38 /*
39 * Helpers for extracting complex instruction fields.
40 */
41
42 /* See e.g. ASR (immediate, predicated).
43 * Returns -1 for unallocated encoding; diagnose later.
44 */
tszimm_esz(DisasContext * s,int x)45 static int tszimm_esz(DisasContext *s, int x)
46 {
47 x >>= 3; /* discard imm3 */
48 return 31 - clz32(x);
49 }
50
tszimm_shr(DisasContext * s,int x)51 static int tszimm_shr(DisasContext *s, int x)
52 {
53 return (16 << tszimm_esz(s, x)) - x;
54 }
55
56 /* See e.g. LSL (immediate, predicated). */
tszimm_shl(DisasContext * s,int x)57 static int tszimm_shl(DisasContext *s, int x)
58 {
59 return x - (8 << tszimm_esz(s, x));
60 }
61
62 /* The SH bit is in bit 8. Extract the low 8 and shift. */
expand_imm_sh8s(DisasContext * s,int x)63 static inline int expand_imm_sh8s(DisasContext *s, int x)
64 {
65 return (int8_t)x << (x & 0x100 ? 8 : 0);
66 }
67
expand_imm_sh8u(DisasContext * s,int x)68 static inline int expand_imm_sh8u(DisasContext *s, int x)
69 {
70 return (uint8_t)x << (x & 0x100 ? 8 : 0);
71 }
72
73 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
74 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
75 */
msz_dtype(DisasContext * s,int msz)76 static inline int msz_dtype(DisasContext *s, int msz)
77 {
78 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
79 return dtype[msz];
80 }
81
82 /*
83 * Include the generated decoder.
84 */
85
86 #include "decode-sve.c.inc"
87
88 /*
89 * Implement all of the translator functions referenced by the decoder.
90 */
91
92 /* Invoke an out-of-line helper on 2 Zregs. */
gen_gvec_ool_zz(DisasContext * s,gen_helper_gvec_2 * fn,int rd,int rn,int data)93 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
94 int rd, int rn, int data)
95 {
96 if (fn == NULL) {
97 return false;
98 }
99 if (sve_access_check(s)) {
100 unsigned vsz = vec_full_reg_size(s);
101 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
102 vec_full_reg_offset(s, rn),
103 vsz, vsz, data, fn);
104 }
105 return true;
106 }
107
gen_gvec_fpst_zz(DisasContext * s,gen_helper_gvec_2_ptr * fn,int rd,int rn,int data,ARMFPStatusFlavour flavour)108 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
109 int rd, int rn, int data,
110 ARMFPStatusFlavour flavour)
111 {
112 if (fn == NULL) {
113 return false;
114 }
115 if (sve_access_check(s)) {
116 unsigned vsz = vec_full_reg_size(s);
117 TCGv_ptr status = fpstatus_ptr(flavour);
118
119 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
120 vec_full_reg_offset(s, rn),
121 status, vsz, vsz, data, fn);
122 }
123 return true;
124 }
125
gen_gvec_fpst_arg_zz(DisasContext * s,gen_helper_gvec_2_ptr * fn,arg_rr_esz * a,int data)126 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
127 arg_rr_esz *a, int data)
128 {
129 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
130 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
131 }
132
133 /* Invoke an out-of-line helper on 3 Zregs. */
gen_gvec_ool_zzz(DisasContext * s,gen_helper_gvec_3 * fn,int rd,int rn,int rm,int data)134 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
135 int rd, int rn, int rm, int data)
136 {
137 if (fn == NULL) {
138 return false;
139 }
140 if (sve_access_check(s)) {
141 unsigned vsz = vec_full_reg_size(s);
142 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
143 vec_full_reg_offset(s, rn),
144 vec_full_reg_offset(s, rm),
145 vsz, vsz, data, fn);
146 }
147 return true;
148 }
149
gen_gvec_ool_arg_zzz(DisasContext * s,gen_helper_gvec_3 * fn,arg_rrr_esz * a,int data)150 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
151 arg_rrr_esz *a, int data)
152 {
153 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
154 }
155
156 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
gen_gvec_fpst_zzz(DisasContext * s,gen_helper_gvec_3_ptr * fn,int rd,int rn,int rm,int data,ARMFPStatusFlavour flavour)157 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
158 int rd, int rn, int rm,
159 int data, ARMFPStatusFlavour flavour)
160 {
161 if (fn == NULL) {
162 return false;
163 }
164 if (sve_access_check(s)) {
165 unsigned vsz = vec_full_reg_size(s);
166 TCGv_ptr status = fpstatus_ptr(flavour);
167
168 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
169 vec_full_reg_offset(s, rn),
170 vec_full_reg_offset(s, rm),
171 status, vsz, vsz, data, fn);
172 }
173 return true;
174 }
175
gen_gvec_fpst_arg_zzz(DisasContext * s,gen_helper_gvec_3_ptr * fn,arg_rrr_esz * a,int data)176 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
177 arg_rrr_esz *a, int data)
178 {
179 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
180 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
181 }
182
183 /* Invoke an out-of-line helper on 4 Zregs. */
gen_gvec_ool_zzzz(DisasContext * s,gen_helper_gvec_4 * fn,int rd,int rn,int rm,int ra,int data)184 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
185 int rd, int rn, int rm, int ra, int data)
186 {
187 if (fn == NULL) {
188 return false;
189 }
190 if (sve_access_check(s)) {
191 unsigned vsz = vec_full_reg_size(s);
192 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
193 vec_full_reg_offset(s, rn),
194 vec_full_reg_offset(s, rm),
195 vec_full_reg_offset(s, ra),
196 vsz, vsz, data, fn);
197 }
198 return true;
199 }
200
gen_gvec_ool_arg_zzzz(DisasContext * s,gen_helper_gvec_4 * fn,arg_rrrr_esz * a,int data)201 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
202 arg_rrrr_esz *a, int data)
203 {
204 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
205 }
206
gen_gvec_ool_arg_zzxz(DisasContext * s,gen_helper_gvec_4 * fn,arg_rrxr_esz * a)207 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
208 arg_rrxr_esz *a)
209 {
210 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
211 }
212
213 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
gen_gvec_ptr_zzzz(DisasContext * s,gen_helper_gvec_4_ptr * fn,int rd,int rn,int rm,int ra,int data,TCGv_ptr ptr)214 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
215 int rd, int rn, int rm, int ra,
216 int data, TCGv_ptr ptr)
217 {
218 if (fn == NULL) {
219 return false;
220 }
221 if (sve_access_check(s)) {
222 unsigned vsz = vec_full_reg_size(s);
223 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
224 vec_full_reg_offset(s, rn),
225 vec_full_reg_offset(s, rm),
226 vec_full_reg_offset(s, ra),
227 ptr, vsz, vsz, data, fn);
228 }
229 return true;
230 }
231
gen_gvec_fpst_zzzz(DisasContext * s,gen_helper_gvec_4_ptr * fn,int rd,int rn,int rm,int ra,int data,ARMFPStatusFlavour flavour)232 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
233 int rd, int rn, int rm, int ra,
234 int data, ARMFPStatusFlavour flavour)
235 {
236 TCGv_ptr status = fpstatus_ptr(flavour);
237 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
238 return ret;
239 }
240
241 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
gen_gvec_fpst_zzzzp(DisasContext * s,gen_helper_gvec_5_ptr * fn,int rd,int rn,int rm,int ra,int pg,int data,ARMFPStatusFlavour flavour)242 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
243 int rd, int rn, int rm, int ra, int pg,
244 int data, ARMFPStatusFlavour flavour)
245 {
246 if (fn == NULL) {
247 return false;
248 }
249 if (sve_access_check(s)) {
250 unsigned vsz = vec_full_reg_size(s);
251 TCGv_ptr status = fpstatus_ptr(flavour);
252
253 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd),
254 vec_full_reg_offset(s, rn),
255 vec_full_reg_offset(s, rm),
256 vec_full_reg_offset(s, ra),
257 pred_full_reg_offset(s, pg),
258 status, vsz, vsz, data, fn);
259 }
260 return true;
261 }
262
263 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */
gen_gvec_ool_zzp(DisasContext * s,gen_helper_gvec_3 * fn,int rd,int rn,int pg,int data)264 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
265 int rd, int rn, int pg, int data)
266 {
267 if (fn == NULL) {
268 return false;
269 }
270 if (sve_access_check(s)) {
271 unsigned vsz = vec_full_reg_size(s);
272 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
273 vec_full_reg_offset(s, rn),
274 pred_full_reg_offset(s, pg),
275 vsz, vsz, data, fn);
276 }
277 return true;
278 }
279
gen_gvec_ool_arg_zpz(DisasContext * s,gen_helper_gvec_3 * fn,arg_rpr_esz * a,int data)280 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
281 arg_rpr_esz *a, int data)
282 {
283 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
284 }
285
gen_gvec_ool_arg_zpzi(DisasContext * s,gen_helper_gvec_3 * fn,arg_rpri_esz * a)286 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
287 arg_rpri_esz *a)
288 {
289 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
290 }
291
gen_gvec_fpst_zzp(DisasContext * s,gen_helper_gvec_3_ptr * fn,int rd,int rn,int pg,int data,ARMFPStatusFlavour flavour)292 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn,
293 int rd, int rn, int pg, int data,
294 ARMFPStatusFlavour flavour)
295 {
296 if (fn == NULL) {
297 return false;
298 }
299 if (sve_access_check(s)) {
300 unsigned vsz = vec_full_reg_size(s);
301 TCGv_ptr status = fpstatus_ptr(flavour);
302
303 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
304 vec_full_reg_offset(s, rn),
305 pred_full_reg_offset(s, pg),
306 status, vsz, vsz, data, fn);
307 }
308 return true;
309 }
310
gen_gvec_fpst_arg_zpz(DisasContext * s,gen_helper_gvec_3_ptr * fn,arg_rpr_esz * a,int data,ARMFPStatusFlavour flavour)311 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
312 arg_rpr_esz *a, int data,
313 ARMFPStatusFlavour flavour)
314 {
315 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour);
316 }
317
318 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
gen_gvec_ool_zzzp(DisasContext * s,gen_helper_gvec_4 * fn,int rd,int rn,int rm,int pg,int data)319 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
320 int rd, int rn, int rm, int pg, int data)
321 {
322 if (fn == NULL) {
323 return false;
324 }
325 if (sve_access_check(s)) {
326 unsigned vsz = vec_full_reg_size(s);
327 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
328 vec_full_reg_offset(s, rn),
329 vec_full_reg_offset(s, rm),
330 pred_full_reg_offset(s, pg),
331 vsz, vsz, data, fn);
332 }
333 return true;
334 }
335
gen_gvec_ool_arg_zpzz(DisasContext * s,gen_helper_gvec_4 * fn,arg_rprr_esz * a,int data)336 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
337 arg_rprr_esz *a, int data)
338 {
339 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
340 }
341
342 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
gen_gvec_fpst_zzzp(DisasContext * s,gen_helper_gvec_4_ptr * fn,int rd,int rn,int rm,int pg,int data,ARMFPStatusFlavour flavour)343 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn,
344 int rd, int rn, int rm, int pg, int data,
345 ARMFPStatusFlavour flavour)
346 {
347 if (fn == NULL) {
348 return false;
349 }
350 if (sve_access_check(s)) {
351 unsigned vsz = vec_full_reg_size(s);
352 TCGv_ptr status = fpstatus_ptr(flavour);
353
354 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
355 vec_full_reg_offset(s, rn),
356 vec_full_reg_offset(s, rm),
357 pred_full_reg_offset(s, pg),
358 status, vsz, vsz, data, fn);
359 }
360 return true;
361 }
362
gen_gvec_fpst_arg_zpzz(DisasContext * s,gen_helper_gvec_4_ptr * fn,arg_rprr_esz * a)363 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
364 arg_rprr_esz *a)
365 {
366 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
367 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
368 }
369
370 /* Invoke a vector expander on two Zregs and an immediate. */
gen_gvec_fn_zzi(DisasContext * s,GVecGen2iFn * gvec_fn,int esz,int rd,int rn,uint64_t imm)371 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
372 int esz, int rd, int rn, uint64_t imm)
373 {
374 if (gvec_fn == NULL) {
375 return false;
376 }
377 if (sve_access_check(s)) {
378 unsigned vsz = vec_full_reg_size(s);
379 gvec_fn(esz, vec_full_reg_offset(s, rd),
380 vec_full_reg_offset(s, rn), imm, vsz, vsz);
381 }
382 return true;
383 }
384
gen_gvec_fn_arg_zzi(DisasContext * s,GVecGen2iFn * gvec_fn,arg_rri_esz * a)385 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
386 arg_rri_esz *a)
387 {
388 if (a->esz < 0) {
389 /* Invalid tsz encoding -- see tszimm_esz. */
390 return false;
391 }
392 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
393 }
394
395 /* Invoke a vector expander on three Zregs. */
gen_gvec_fn_zzz(DisasContext * s,GVecGen3Fn * gvec_fn,int esz,int rd,int rn,int rm)396 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
397 int esz, int rd, int rn, int rm)
398 {
399 if (gvec_fn == NULL) {
400 return false;
401 }
402 if (sve_access_check(s)) {
403 unsigned vsz = vec_full_reg_size(s);
404 gvec_fn(esz, vec_full_reg_offset(s, rd),
405 vec_full_reg_offset(s, rn),
406 vec_full_reg_offset(s, rm), vsz, vsz);
407 }
408 return true;
409 }
410
gen_gvec_fn_arg_zzz(DisasContext * s,GVecGen3Fn * fn,arg_rrr_esz * a)411 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
412 arg_rrr_esz *a)
413 {
414 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
415 }
416
417 /* Invoke a vector expander on four Zregs. */
gen_gvec_fn_arg_zzzz(DisasContext * s,GVecGen4Fn * gvec_fn,arg_rrrr_esz * a)418 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
419 arg_rrrr_esz *a)
420 {
421 if (gvec_fn == NULL) {
422 return false;
423 }
424 if (sve_access_check(s)) {
425 unsigned vsz = vec_full_reg_size(s);
426 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
427 vec_full_reg_offset(s, a->rn),
428 vec_full_reg_offset(s, a->rm),
429 vec_full_reg_offset(s, a->ra), vsz, vsz);
430 }
431 return true;
432 }
433
434 /* Invoke a vector move on two Zregs. */
do_mov_z(DisasContext * s,int rd,int rn)435 static bool do_mov_z(DisasContext *s, int rd, int rn)
436 {
437 if (sve_access_check(s)) {
438 unsigned vsz = vec_full_reg_size(s);
439 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
440 vec_full_reg_offset(s, rn), vsz, vsz);
441 }
442 return true;
443 }
444
445 /* Initialize a Zreg with replications of a 64-bit immediate. */
do_dupi_z(DisasContext * s,int rd,uint64_t word)446 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
447 {
448 unsigned vsz = vec_full_reg_size(s);
449 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
450 }
451
452 /* Invoke a vector expander on three Pregs. */
gen_gvec_fn_ppp(DisasContext * s,GVecGen3Fn * gvec_fn,int rd,int rn,int rm)453 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
454 int rd, int rn, int rm)
455 {
456 if (sve_access_check(s)) {
457 unsigned psz = pred_gvec_reg_size(s);
458 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
459 pred_full_reg_offset(s, rn),
460 pred_full_reg_offset(s, rm), psz, psz);
461 }
462 return true;
463 }
464
465 /* Invoke a vector move on two Pregs. */
do_mov_p(DisasContext * s,int rd,int rn)466 static bool do_mov_p(DisasContext *s, int rd, int rn)
467 {
468 if (sve_access_check(s)) {
469 unsigned psz = pred_gvec_reg_size(s);
470 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
471 pred_full_reg_offset(s, rn), psz, psz);
472 }
473 return true;
474 }
475
476 /* Set the cpu flags as per a return from an SVE helper. */
do_pred_flags(TCGv_i32 t)477 static void do_pred_flags(TCGv_i32 t)
478 {
479 tcg_gen_mov_i32(cpu_NF, t);
480 tcg_gen_andi_i32(cpu_ZF, t, 2);
481 tcg_gen_andi_i32(cpu_CF, t, 1);
482 tcg_gen_movi_i32(cpu_VF, 0);
483 }
484
485 /* Subroutines computing the ARM PredTest psuedofunction. */
do_predtest1(TCGv_i64 d,TCGv_i64 g)486 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
487 {
488 TCGv_i32 t = tcg_temp_new_i32();
489
490 gen_helper_sve_predtest1(t, d, g);
491 do_pred_flags(t);
492 }
493
do_predtest(DisasContext * s,int dofs,int gofs,int words)494 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
495 {
496 TCGv_ptr dptr = tcg_temp_new_ptr();
497 TCGv_ptr gptr = tcg_temp_new_ptr();
498 TCGv_i32 t = tcg_temp_new_i32();
499
500 tcg_gen_addi_ptr(dptr, tcg_env, dofs);
501 tcg_gen_addi_ptr(gptr, tcg_env, gofs);
502
503 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
504
505 do_pred_flags(t);
506 }
507
508 /* For each element size, the bits within a predicate word that are active. */
509 const uint64_t pred_esz_masks[5] = {
510 0xffffffffffffffffull, 0x5555555555555555ull,
511 0x1111111111111111ull, 0x0101010101010101ull,
512 0x0001000100010001ull,
513 };
514
trans_INVALID(DisasContext * s,arg_INVALID * a)515 static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
516 {
517 unallocated_encoding(s);
518 return true;
519 }
520
521 /*
522 *** SVE Logical - Unpredicated Group
523 */
524
TRANS_FEAT(AND_zzz,aa64_sve,gen_gvec_fn_arg_zzz,tcg_gen_gvec_and,a)525 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
526 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
527 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
528 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
529
530 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
531 {
532 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
533 return false;
534 }
535 if (sve_access_check(s)) {
536 unsigned vsz = vec_full_reg_size(s);
537 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
538 vec_full_reg_offset(s, a->rn),
539 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
540 }
541 return true;
542 }
543
TRANS_FEAT(EOR3,aa64_sve2,gen_gvec_fn_arg_zzzz,gen_gvec_eor3,a)544 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a)
545 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a)
546
547 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
548 uint32_t a, uint32_t oprsz, uint32_t maxsz)
549 {
550 /* BSL differs from the generic bitsel in argument ordering. */
551 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
552 }
553
TRANS_FEAT(BSL,aa64_sve2,gen_gvec_fn_arg_zzzz,gen_bsl,a)554 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
555
556 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
557 {
558 tcg_gen_andc_i64(n, k, n);
559 tcg_gen_andc_i64(m, m, k);
560 tcg_gen_or_i64(d, n, m);
561 }
562
gen_bsl1n_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec k)563 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
564 TCGv_vec m, TCGv_vec k)
565 {
566 if (TCG_TARGET_HAS_bitsel_vec) {
567 tcg_gen_not_vec(vece, n, n);
568 tcg_gen_bitsel_vec(vece, d, k, n, m);
569 } else {
570 tcg_gen_andc_vec(vece, n, k, n);
571 tcg_gen_andc_vec(vece, m, m, k);
572 tcg_gen_or_vec(vece, d, n, m);
573 }
574 }
575
gen_bsl1n(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)576 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
577 uint32_t a, uint32_t oprsz, uint32_t maxsz)
578 {
579 static const GVecGen4 op = {
580 .fni8 = gen_bsl1n_i64,
581 .fniv = gen_bsl1n_vec,
582 .fno = gen_helper_sve2_bsl1n,
583 .vece = MO_64,
584 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
585 };
586 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
587 }
588
TRANS_FEAT(BSL1N,aa64_sve2,gen_gvec_fn_arg_zzzz,gen_bsl1n,a)589 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
590
591 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
592 {
593 /*
594 * Z[dn] = (n & k) | (~m & ~k)
595 * = | ~(m | k)
596 */
597 tcg_gen_and_i64(n, n, k);
598 if (TCG_TARGET_HAS_orc_i64) {
599 tcg_gen_or_i64(m, m, k);
600 tcg_gen_orc_i64(d, n, m);
601 } else {
602 tcg_gen_nor_i64(m, m, k);
603 tcg_gen_or_i64(d, n, m);
604 }
605 }
606
gen_bsl2n_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec k)607 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
608 TCGv_vec m, TCGv_vec k)
609 {
610 if (TCG_TARGET_HAS_bitsel_vec) {
611 tcg_gen_not_vec(vece, m, m);
612 tcg_gen_bitsel_vec(vece, d, k, n, m);
613 } else {
614 tcg_gen_and_vec(vece, n, n, k);
615 tcg_gen_or_vec(vece, m, m, k);
616 tcg_gen_orc_vec(vece, d, n, m);
617 }
618 }
619
gen_bsl2n(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)620 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
621 uint32_t a, uint32_t oprsz, uint32_t maxsz)
622 {
623 static const GVecGen4 op = {
624 .fni8 = gen_bsl2n_i64,
625 .fniv = gen_bsl2n_vec,
626 .fno = gen_helper_sve2_bsl2n,
627 .vece = MO_64,
628 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
629 };
630 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
631 }
632
TRANS_FEAT(BSL2N,aa64_sve2,gen_gvec_fn_arg_zzzz,gen_bsl2n,a)633 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
634
635 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
636 {
637 tcg_gen_and_i64(n, n, k);
638 tcg_gen_andc_i64(m, m, k);
639 tcg_gen_nor_i64(d, n, m);
640 }
641
gen_nbsl_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec k)642 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
643 TCGv_vec m, TCGv_vec k)
644 {
645 tcg_gen_bitsel_vec(vece, d, k, n, m);
646 tcg_gen_not_vec(vece, d, d);
647 }
648
gen_nbsl(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)649 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
650 uint32_t a, uint32_t oprsz, uint32_t maxsz)
651 {
652 static const GVecGen4 op = {
653 .fni8 = gen_nbsl_i64,
654 .fniv = gen_nbsl_vec,
655 .fno = gen_helper_sve2_nbsl,
656 .vece = MO_64,
657 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
658 };
659 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
660 }
661
TRANS_FEAT(NBSL,aa64_sve2,gen_gvec_fn_arg_zzzz,gen_nbsl,a)662 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
663
664 /*
665 *** SVE Integer Arithmetic - Unpredicated Group
666 */
667
668 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
669 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
670 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
671 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
672 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
673 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
674
675 /*
676 *** SVE Integer Arithmetic - Binary Predicated Group
677 */
678
679 /* Select active elememnts from Zn and inactive elements from Zm,
680 * storing the result in Zd.
681 */
682 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
683 {
684 static gen_helper_gvec_4 * const fns[4] = {
685 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
686 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
687 };
688 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
689 }
690
691 #define DO_ZPZZ(NAME, FEAT, name) \
692 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
693 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
694 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
695 }; \
696 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
697 name##_zpzz_fns[a->esz], a, 0)
698
699 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
700 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
701 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
702 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
703
704 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
705 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
706
707 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
708 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
709 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
710 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
711 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
712 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
713
714 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
715 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
716 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
717
718 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
719 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
720 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
721
722 static gen_helper_gvec_4 * const sdiv_fns[4] = {
723 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
724 };
725 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
726
727 static gen_helper_gvec_4 * const udiv_fns[4] = {
728 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
729 };
730 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
731
732 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz)
733
734 /*
735 *** SVE Integer Arithmetic - Unary Predicated Group
736 */
737
738 #define DO_ZPZ(NAME, FEAT, name) \
739 static gen_helper_gvec_3 * const name##_fns[4] = { \
740 gen_helper_##name##_b, gen_helper_##name##_h, \
741 gen_helper_##name##_s, gen_helper_##name##_d, \
742 }; \
743 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
744
745 DO_ZPZ(CLS, aa64_sve, sve_cls)
746 DO_ZPZ(CLZ, aa64_sve, sve_clz)
747 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
748 DO_ZPZ(CNOT, aa64_sve, sve_cnot)
749 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
750 DO_ZPZ(ABS, aa64_sve, sve_abs)
751 DO_ZPZ(NEG, aa64_sve, sve_neg)
752 DO_ZPZ(RBIT, aa64_sve, sve_rbit)
753
754 static gen_helper_gvec_3 * const fabs_fns[4] = {
755 NULL, gen_helper_sve_fabs_h,
756 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
757 };
758 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
759
760 static gen_helper_gvec_3 * const fneg_fns[4] = {
761 NULL, gen_helper_sve_fneg_h,
762 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
763 };
764 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
765
766 static gen_helper_gvec_3 * const sxtb_fns[4] = {
767 NULL, gen_helper_sve_sxtb_h,
768 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
769 };
770 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
771
772 static gen_helper_gvec_3 * const uxtb_fns[4] = {
773 NULL, gen_helper_sve_uxtb_h,
774 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
775 };
776 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
777
778 static gen_helper_gvec_3 * const sxth_fns[4] = {
779 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
780 };
781 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
782
783 static gen_helper_gvec_3 * const uxth_fns[4] = {
784 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
785 };
786 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
787
788 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
789 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
790 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
791 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
792
793 /*
794 *** SVE Integer Reduction Group
795 */
796
797 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
do_vpz_ool(DisasContext * s,arg_rpr_esz * a,gen_helper_gvec_reduc * fn)798 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
799 gen_helper_gvec_reduc *fn)
800 {
801 unsigned vsz = vec_full_reg_size(s);
802 TCGv_ptr t_zn, t_pg;
803 TCGv_i32 desc;
804 TCGv_i64 temp;
805
806 if (fn == NULL) {
807 return false;
808 }
809 if (!sve_access_check(s)) {
810 return true;
811 }
812
813 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
814 temp = tcg_temp_new_i64();
815 t_zn = tcg_temp_new_ptr();
816 t_pg = tcg_temp_new_ptr();
817
818 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
819 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
820 fn(temp, t_zn, t_pg, desc);
821
822 write_fp_dreg(s, a->rd, temp);
823 return true;
824 }
825
826 #define DO_VPZ(NAME, name) \
827 static gen_helper_gvec_reduc * const name##_fns[4] = { \
828 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
829 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
830 }; \
831 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
832
833 DO_VPZ(ORV, orv)
834 DO_VPZ(ANDV, andv)
835 DO_VPZ(EORV, eorv)
836
837 DO_VPZ(UADDV, uaddv)
838 DO_VPZ(SMAXV, smaxv)
839 DO_VPZ(UMAXV, umaxv)
840 DO_VPZ(SMINV, sminv)
841 DO_VPZ(UMINV, uminv)
842
843 static gen_helper_gvec_reduc * const saddv_fns[4] = {
844 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
845 gen_helper_sve_saddv_s, NULL
846 };
TRANS_FEAT(SADDV,aa64_sve,do_vpz_ool,a,saddv_fns[a->esz])847 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
848
849 #undef DO_VPZ
850
851 /*
852 *** SVE Shift by Immediate - Predicated Group
853 */
854
855 /*
856 * Copy Zn into Zd, storing zeros into inactive elements.
857 * If invert, store zeros into the active elements.
858 */
859 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
860 int esz, bool invert)
861 {
862 static gen_helper_gvec_3 * const fns[4] = {
863 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
864 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
865 };
866 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
867 }
868
do_shift_zpzi(DisasContext * s,arg_rpri_esz * a,bool asr,gen_helper_gvec_3 * const fns[4])869 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
870 gen_helper_gvec_3 * const fns[4])
871 {
872 int max;
873
874 if (a->esz < 0) {
875 /* Invalid tsz encoding -- see tszimm_esz. */
876 return false;
877 }
878
879 /*
880 * Shift by element size is architecturally valid.
881 * For arithmetic right-shift, it's the same as by one less.
882 * For logical shifts and ASRD, it is a zeroing operation.
883 */
884 max = 8 << a->esz;
885 if (a->imm >= max) {
886 if (asr) {
887 a->imm = max - 1;
888 } else {
889 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
890 }
891 }
892 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
893 }
894
895 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
896 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
897 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
898 };
899 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
900
901 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
902 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
903 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
904 };
905 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
906
907 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
908 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
909 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
910 };
911 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
912
913 static gen_helper_gvec_3 * const asrd_fns[4] = {
914 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
915 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
916 };
917 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
918
919 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
920 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
921 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
922 };
923 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
924 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
925
926 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
927 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
928 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
929 };
930 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
931 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
932
933 static gen_helper_gvec_3 * const srshr_fns[4] = {
934 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
935 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
936 };
937 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
938 a->esz < 0 ? NULL : srshr_fns[a->esz], a)
939
940 static gen_helper_gvec_3 * const urshr_fns[4] = {
941 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
942 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
943 };
944 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
945 a->esz < 0 ? NULL : urshr_fns[a->esz], a)
946
947 static gen_helper_gvec_3 * const sqshlu_fns[4] = {
948 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
949 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
950 };
951 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
952 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
953
954 /*
955 *** SVE Bitwise Shift - Predicated Group
956 */
957
958 #define DO_ZPZW(NAME, name) \
959 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
960 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
961 gen_helper_sve_##name##_zpzw_s, NULL \
962 }; \
963 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
964 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
965
DO_ZPZW(ASR,asr)966 DO_ZPZW(ASR, asr)
967 DO_ZPZW(LSR, lsr)
968 DO_ZPZW(LSL, lsl)
969
970 #undef DO_ZPZW
971
972 /*
973 *** SVE Bitwise Shift - Unpredicated Group
974 */
975
976 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
977 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
978 int64_t, uint32_t, uint32_t))
979 {
980 if (a->esz < 0) {
981 /* Invalid tsz encoding -- see tszimm_esz. */
982 return false;
983 }
984 if (sve_access_check(s)) {
985 unsigned vsz = vec_full_reg_size(s);
986 /* Shift by element size is architecturally valid. For
987 arithmetic right-shift, it's the same as by one less.
988 Otherwise it is a zeroing operation. */
989 if (a->imm >= 8 << a->esz) {
990 if (asr) {
991 a->imm = (8 << a->esz) - 1;
992 } else {
993 do_dupi_z(s, a->rd, 0);
994 return true;
995 }
996 }
997 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
998 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
999 }
1000 return true;
1001 }
1002
TRANS_FEAT(ASR_zzi,aa64_sve,do_shift_imm,a,true,tcg_gen_gvec_sari)1003 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
1004 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
1005 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
1006
1007 #define DO_ZZW(NAME, name) \
1008 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
1009 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1010 gen_helper_sve_##name##_zzw_s, NULL \
1011 }; \
1012 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1013 name##_zzw_fns[a->esz], a, 0)
1014
1015 DO_ZZW(ASR_zzw, asr)
1016 DO_ZZW(LSR_zzw, lsr)
1017 DO_ZZW(LSL_zzw, lsl)
1018
1019 #undef DO_ZZW
1020
1021 /*
1022 *** SVE Integer Multiply-Add Group
1023 */
1024
1025 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1026 gen_helper_gvec_5 *fn)
1027 {
1028 if (sve_access_check(s)) {
1029 unsigned vsz = vec_full_reg_size(s);
1030 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1031 vec_full_reg_offset(s, a->ra),
1032 vec_full_reg_offset(s, a->rn),
1033 vec_full_reg_offset(s, a->rm),
1034 pred_full_reg_offset(s, a->pg),
1035 vsz, vsz, 0, fn);
1036 }
1037 return true;
1038 }
1039
1040 static gen_helper_gvec_5 * const mla_fns[4] = {
1041 gen_helper_sve_mla_b, gen_helper_sve_mla_h,
1042 gen_helper_sve_mla_s, gen_helper_sve_mla_d,
1043 };
1044 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
1045
1046 static gen_helper_gvec_5 * const mls_fns[4] = {
1047 gen_helper_sve_mls_b, gen_helper_sve_mls_h,
1048 gen_helper_sve_mls_s, gen_helper_sve_mls_d,
1049 };
TRANS_FEAT(MLS,aa64_sve,do_zpzzz_ool,a,mls_fns[a->esz])1050 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
1051
1052 /*
1053 *** SVE Index Generation Group
1054 */
1055
1056 static bool do_index(DisasContext *s, int esz, int rd,
1057 TCGv_i64 start, TCGv_i64 incr)
1058 {
1059 unsigned vsz;
1060 TCGv_i32 desc;
1061 TCGv_ptr t_zd;
1062
1063 if (!sve_access_check(s)) {
1064 return true;
1065 }
1066
1067 vsz = vec_full_reg_size(s);
1068 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1069 t_zd = tcg_temp_new_ptr();
1070
1071 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd));
1072 if (esz == 3) {
1073 gen_helper_sve_index_d(t_zd, start, incr, desc);
1074 } else {
1075 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1076 static index_fn * const fns[3] = {
1077 gen_helper_sve_index_b,
1078 gen_helper_sve_index_h,
1079 gen_helper_sve_index_s,
1080 };
1081 TCGv_i32 s32 = tcg_temp_new_i32();
1082 TCGv_i32 i32 = tcg_temp_new_i32();
1083
1084 tcg_gen_extrl_i64_i32(s32, start);
1085 tcg_gen_extrl_i64_i32(i32, incr);
1086 fns[esz](t_zd, s32, i32, desc);
1087 }
1088 return true;
1089 }
1090
1091 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
1092 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
1093 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
1094 tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
1095 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
1096 cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
1097 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
1098 cpu_reg(s, a->rn), cpu_reg(s, a->rm))
1099
1100 /*
1101 *** SVE Stack Allocation Group
1102 */
1103
trans_ADDVL(DisasContext * s,arg_ADDVL * a)1104 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
1105 {
1106 if (!dc_isar_feature(aa64_sve, s)) {
1107 return false;
1108 }
1109 if (sve_access_check(s)) {
1110 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1111 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1112 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1113 }
1114 return true;
1115 }
1116
trans_ADDSVL(DisasContext * s,arg_ADDSVL * a)1117 static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
1118 {
1119 if (!dc_isar_feature(aa64_sme, s)) {
1120 return false;
1121 }
1122 if (sme_enabled_check(s)) {
1123 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1124 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1125 tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
1126 }
1127 return true;
1128 }
1129
trans_ADDPL(DisasContext * s,arg_ADDPL * a)1130 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
1131 {
1132 if (!dc_isar_feature(aa64_sve, s)) {
1133 return false;
1134 }
1135 if (sve_access_check(s)) {
1136 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1137 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1138 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1139 }
1140 return true;
1141 }
1142
trans_ADDSPL(DisasContext * s,arg_ADDSPL * a)1143 static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
1144 {
1145 if (!dc_isar_feature(aa64_sme, s)) {
1146 return false;
1147 }
1148 if (sme_enabled_check(s)) {
1149 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1150 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1151 tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
1152 }
1153 return true;
1154 }
1155
trans_RDVL(DisasContext * s,arg_RDVL * a)1156 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
1157 {
1158 if (!dc_isar_feature(aa64_sve, s)) {
1159 return false;
1160 }
1161 if (sve_access_check(s)) {
1162 TCGv_i64 reg = cpu_reg(s, a->rd);
1163 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1164 }
1165 return true;
1166 }
1167
trans_RDSVL(DisasContext * s,arg_RDSVL * a)1168 static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
1169 {
1170 if (!dc_isar_feature(aa64_sme, s)) {
1171 return false;
1172 }
1173 if (sme_enabled_check(s)) {
1174 TCGv_i64 reg = cpu_reg(s, a->rd);
1175 tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
1176 }
1177 return true;
1178 }
1179
1180 /*
1181 *** SVE Compute Vector Address Group
1182 */
1183
do_adr(DisasContext * s,arg_rrri * a,gen_helper_gvec_3 * fn)1184 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1185 {
1186 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
1187 }
1188
1189 TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
1190 TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
1191 TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
1192 TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
1193
1194 /*
1195 *** SVE Integer Misc - Unpredicated Group
1196 */
1197
1198 static gen_helper_gvec_2 * const fexpa_fns[4] = {
1199 NULL, gen_helper_sve_fexpa_h,
1200 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1201 };
1202 TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
1203 fexpa_fns[a->esz], a->rd, a->rn, 0)
1204
1205 static gen_helper_gvec_3 * const ftssel_fns[4] = {
1206 NULL, gen_helper_sve_ftssel_h,
1207 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1208 };
1209 TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
1210 ftssel_fns[a->esz], a, 0)
1211
1212 /*
1213 *** SVE Predicate Logical Operations Group
1214 */
1215
do_pppp_flags(DisasContext * s,arg_rprr_s * a,const GVecGen4 * gvec_op)1216 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1217 const GVecGen4 *gvec_op)
1218 {
1219 if (!sve_access_check(s)) {
1220 return true;
1221 }
1222
1223 unsigned psz = pred_gvec_reg_size(s);
1224 int dofs = pred_full_reg_offset(s, a->rd);
1225 int nofs = pred_full_reg_offset(s, a->rn);
1226 int mofs = pred_full_reg_offset(s, a->rm);
1227 int gofs = pred_full_reg_offset(s, a->pg);
1228
1229 if (!a->s) {
1230 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1231 return true;
1232 }
1233
1234 if (psz == 8) {
1235 /* Do the operation and the flags generation in temps. */
1236 TCGv_i64 pd = tcg_temp_new_i64();
1237 TCGv_i64 pn = tcg_temp_new_i64();
1238 TCGv_i64 pm = tcg_temp_new_i64();
1239 TCGv_i64 pg = tcg_temp_new_i64();
1240
1241 tcg_gen_ld_i64(pn, tcg_env, nofs);
1242 tcg_gen_ld_i64(pm, tcg_env, mofs);
1243 tcg_gen_ld_i64(pg, tcg_env, gofs);
1244
1245 gvec_op->fni8(pd, pn, pm, pg);
1246 tcg_gen_st_i64(pd, tcg_env, dofs);
1247
1248 do_predtest1(pd, pg);
1249 } else {
1250 /* The operation and flags generation is large. The computation
1251 * of the flags depends on the original contents of the guarding
1252 * predicate. If the destination overwrites the guarding predicate,
1253 * then the easiest way to get this right is to save a copy.
1254 */
1255 int tofs = gofs;
1256 if (a->rd == a->pg) {
1257 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1258 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1259 }
1260
1261 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1262 do_predtest(s, dofs, tofs, psz / 8);
1263 }
1264 return true;
1265 }
1266
gen_and_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1267 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1268 {
1269 tcg_gen_and_i64(pd, pn, pm);
1270 tcg_gen_and_i64(pd, pd, pg);
1271 }
1272
gen_and_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1273 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1274 TCGv_vec pm, TCGv_vec pg)
1275 {
1276 tcg_gen_and_vec(vece, pd, pn, pm);
1277 tcg_gen_and_vec(vece, pd, pd, pg);
1278 }
1279
trans_AND_pppp(DisasContext * s,arg_rprr_s * a)1280 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1281 {
1282 static const GVecGen4 op = {
1283 .fni8 = gen_and_pg_i64,
1284 .fniv = gen_and_pg_vec,
1285 .fno = gen_helper_sve_and_pppp,
1286 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1287 };
1288
1289 if (!dc_isar_feature(aa64_sve, s)) {
1290 return false;
1291 }
1292 if (!a->s) {
1293 if (a->rn == a->rm) {
1294 if (a->pg == a->rn) {
1295 return do_mov_p(s, a->rd, a->rn);
1296 }
1297 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1298 } else if (a->pg == a->rn || a->pg == a->rm) {
1299 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1300 }
1301 }
1302 return do_pppp_flags(s, a, &op);
1303 }
1304
gen_bic_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1305 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1306 {
1307 tcg_gen_andc_i64(pd, pn, pm);
1308 tcg_gen_and_i64(pd, pd, pg);
1309 }
1310
gen_bic_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1311 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1312 TCGv_vec pm, TCGv_vec pg)
1313 {
1314 tcg_gen_andc_vec(vece, pd, pn, pm);
1315 tcg_gen_and_vec(vece, pd, pd, pg);
1316 }
1317
trans_BIC_pppp(DisasContext * s,arg_rprr_s * a)1318 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1319 {
1320 static const GVecGen4 op = {
1321 .fni8 = gen_bic_pg_i64,
1322 .fniv = gen_bic_pg_vec,
1323 .fno = gen_helper_sve_bic_pppp,
1324 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1325 };
1326
1327 if (!dc_isar_feature(aa64_sve, s)) {
1328 return false;
1329 }
1330 if (!a->s && a->pg == a->rn) {
1331 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1332 }
1333 return do_pppp_flags(s, a, &op);
1334 }
1335
gen_eor_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1336 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1337 {
1338 tcg_gen_xor_i64(pd, pn, pm);
1339 tcg_gen_and_i64(pd, pd, pg);
1340 }
1341
gen_eor_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1342 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1343 TCGv_vec pm, TCGv_vec pg)
1344 {
1345 tcg_gen_xor_vec(vece, pd, pn, pm);
1346 tcg_gen_and_vec(vece, pd, pd, pg);
1347 }
1348
trans_EOR_pppp(DisasContext * s,arg_rprr_s * a)1349 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1350 {
1351 static const GVecGen4 op = {
1352 .fni8 = gen_eor_pg_i64,
1353 .fniv = gen_eor_pg_vec,
1354 .fno = gen_helper_sve_eor_pppp,
1355 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1356 };
1357
1358 if (!dc_isar_feature(aa64_sve, s)) {
1359 return false;
1360 }
1361 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
1362 if (!a->s && a->pg == a->rm) {
1363 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn);
1364 }
1365 return do_pppp_flags(s, a, &op);
1366 }
1367
trans_SEL_pppp(DisasContext * s,arg_rprr_s * a)1368 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1369 {
1370 if (a->s || !dc_isar_feature(aa64_sve, s)) {
1371 return false;
1372 }
1373 if (sve_access_check(s)) {
1374 unsigned psz = pred_gvec_reg_size(s);
1375 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1376 pred_full_reg_offset(s, a->pg),
1377 pred_full_reg_offset(s, a->rn),
1378 pred_full_reg_offset(s, a->rm), psz, psz);
1379 }
1380 return true;
1381 }
1382
gen_orr_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1383 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1384 {
1385 tcg_gen_or_i64(pd, pn, pm);
1386 tcg_gen_and_i64(pd, pd, pg);
1387 }
1388
gen_orr_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1389 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1390 TCGv_vec pm, TCGv_vec pg)
1391 {
1392 tcg_gen_or_vec(vece, pd, pn, pm);
1393 tcg_gen_and_vec(vece, pd, pd, pg);
1394 }
1395
trans_ORR_pppp(DisasContext * s,arg_rprr_s * a)1396 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1397 {
1398 static const GVecGen4 op = {
1399 .fni8 = gen_orr_pg_i64,
1400 .fniv = gen_orr_pg_vec,
1401 .fno = gen_helper_sve_orr_pppp,
1402 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1403 };
1404
1405 if (!dc_isar_feature(aa64_sve, s)) {
1406 return false;
1407 }
1408 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
1409 return do_mov_p(s, a->rd, a->rn);
1410 }
1411 return do_pppp_flags(s, a, &op);
1412 }
1413
gen_orn_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1414 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1415 {
1416 tcg_gen_orc_i64(pd, pn, pm);
1417 tcg_gen_and_i64(pd, pd, pg);
1418 }
1419
gen_orn_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1420 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1421 TCGv_vec pm, TCGv_vec pg)
1422 {
1423 tcg_gen_orc_vec(vece, pd, pn, pm);
1424 tcg_gen_and_vec(vece, pd, pd, pg);
1425 }
1426
trans_ORN_pppp(DisasContext * s,arg_rprr_s * a)1427 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1428 {
1429 static const GVecGen4 op = {
1430 .fni8 = gen_orn_pg_i64,
1431 .fniv = gen_orn_pg_vec,
1432 .fno = gen_helper_sve_orn_pppp,
1433 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1434 };
1435
1436 if (!dc_isar_feature(aa64_sve, s)) {
1437 return false;
1438 }
1439 return do_pppp_flags(s, a, &op);
1440 }
1441
gen_nor_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1442 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1443 {
1444 tcg_gen_or_i64(pd, pn, pm);
1445 tcg_gen_andc_i64(pd, pg, pd);
1446 }
1447
gen_nor_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1448 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1449 TCGv_vec pm, TCGv_vec pg)
1450 {
1451 tcg_gen_or_vec(vece, pd, pn, pm);
1452 tcg_gen_andc_vec(vece, pd, pg, pd);
1453 }
1454
trans_NOR_pppp(DisasContext * s,arg_rprr_s * a)1455 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1456 {
1457 static const GVecGen4 op = {
1458 .fni8 = gen_nor_pg_i64,
1459 .fniv = gen_nor_pg_vec,
1460 .fno = gen_helper_sve_nor_pppp,
1461 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1462 };
1463
1464 if (!dc_isar_feature(aa64_sve, s)) {
1465 return false;
1466 }
1467 return do_pppp_flags(s, a, &op);
1468 }
1469
gen_nand_pg_i64(TCGv_i64 pd,TCGv_i64 pn,TCGv_i64 pm,TCGv_i64 pg)1470 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1471 {
1472 tcg_gen_and_i64(pd, pn, pm);
1473 tcg_gen_andc_i64(pd, pg, pd);
1474 }
1475
gen_nand_pg_vec(unsigned vece,TCGv_vec pd,TCGv_vec pn,TCGv_vec pm,TCGv_vec pg)1476 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1477 TCGv_vec pm, TCGv_vec pg)
1478 {
1479 tcg_gen_and_vec(vece, pd, pn, pm);
1480 tcg_gen_andc_vec(vece, pd, pg, pd);
1481 }
1482
trans_NAND_pppp(DisasContext * s,arg_rprr_s * a)1483 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1484 {
1485 static const GVecGen4 op = {
1486 .fni8 = gen_nand_pg_i64,
1487 .fniv = gen_nand_pg_vec,
1488 .fno = gen_helper_sve_nand_pppp,
1489 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1490 };
1491
1492 if (!dc_isar_feature(aa64_sve, s)) {
1493 return false;
1494 }
1495 return do_pppp_flags(s, a, &op);
1496 }
1497
1498 /*
1499 *** SVE Predicate Misc Group
1500 */
1501
trans_PTEST(DisasContext * s,arg_PTEST * a)1502 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1503 {
1504 if (!dc_isar_feature(aa64_sve, s)) {
1505 return false;
1506 }
1507 if (sve_access_check(s)) {
1508 int nofs = pred_full_reg_offset(s, a->rn);
1509 int gofs = pred_full_reg_offset(s, a->pg);
1510 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1511
1512 if (words == 1) {
1513 TCGv_i64 pn = tcg_temp_new_i64();
1514 TCGv_i64 pg = tcg_temp_new_i64();
1515
1516 tcg_gen_ld_i64(pn, tcg_env, nofs);
1517 tcg_gen_ld_i64(pg, tcg_env, gofs);
1518 do_predtest1(pn, pg);
1519 } else {
1520 do_predtest(s, nofs, gofs, words);
1521 }
1522 }
1523 return true;
1524 }
1525
1526 /* See the ARM pseudocode DecodePredCount. */
decode_pred_count(unsigned fullsz,int pattern,int esz)1527 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1528 {
1529 unsigned elements = fullsz >> esz;
1530 unsigned bound;
1531
1532 switch (pattern) {
1533 case 0x0: /* POW2 */
1534 return pow2floor(elements);
1535 case 0x1: /* VL1 */
1536 case 0x2: /* VL2 */
1537 case 0x3: /* VL3 */
1538 case 0x4: /* VL4 */
1539 case 0x5: /* VL5 */
1540 case 0x6: /* VL6 */
1541 case 0x7: /* VL7 */
1542 case 0x8: /* VL8 */
1543 bound = pattern;
1544 break;
1545 case 0x9: /* VL16 */
1546 case 0xa: /* VL32 */
1547 case 0xb: /* VL64 */
1548 case 0xc: /* VL128 */
1549 case 0xd: /* VL256 */
1550 bound = 16 << (pattern - 9);
1551 break;
1552 case 0x1d: /* MUL4 */
1553 return elements - elements % 4;
1554 case 0x1e: /* MUL3 */
1555 return elements - elements % 3;
1556 case 0x1f: /* ALL */
1557 return elements;
1558 default: /* #uimm5 */
1559 return 0;
1560 }
1561 return elements >= bound ? bound : 0;
1562 }
1563
1564 /* This handles all of the predicate initialization instructions,
1565 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1566 * so that decode_pred_count returns 0. For SETFFR, we will have
1567 * set RD == 16 == FFR.
1568 */
do_predset(DisasContext * s,int esz,int rd,int pat,bool setflag)1569 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1570 {
1571 if (!sve_access_check(s)) {
1572 return true;
1573 }
1574
1575 unsigned fullsz = vec_full_reg_size(s);
1576 unsigned ofs = pred_full_reg_offset(s, rd);
1577 unsigned numelem, setsz, i;
1578 uint64_t word, lastword;
1579 TCGv_i64 t;
1580
1581 numelem = decode_pred_count(fullsz, pat, esz);
1582
1583 /* Determine what we must store into each bit, and how many. */
1584 if (numelem == 0) {
1585 lastword = word = 0;
1586 setsz = fullsz;
1587 } else {
1588 setsz = numelem << esz;
1589 lastword = word = pred_esz_masks[esz];
1590 if (setsz % 64) {
1591 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1592 }
1593 }
1594
1595 t = tcg_temp_new_i64();
1596 if (fullsz <= 64) {
1597 tcg_gen_movi_i64(t, lastword);
1598 tcg_gen_st_i64(t, tcg_env, ofs);
1599 goto done;
1600 }
1601
1602 if (word == lastword) {
1603 unsigned maxsz = size_for_gvec(fullsz / 8);
1604 unsigned oprsz = size_for_gvec(setsz / 8);
1605
1606 if (oprsz * 8 == setsz) {
1607 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1608 goto done;
1609 }
1610 }
1611
1612 setsz /= 8;
1613 fullsz /= 8;
1614
1615 tcg_gen_movi_i64(t, word);
1616 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1617 tcg_gen_st_i64(t, tcg_env, ofs + i);
1618 }
1619 if (lastword != word) {
1620 tcg_gen_movi_i64(t, lastword);
1621 tcg_gen_st_i64(t, tcg_env, ofs + i);
1622 i += 8;
1623 }
1624 if (i < fullsz) {
1625 tcg_gen_movi_i64(t, 0);
1626 for (; i < fullsz; i += 8) {
1627 tcg_gen_st_i64(t, tcg_env, ofs + i);
1628 }
1629 }
1630
1631 done:
1632 /* PTRUES */
1633 if (setflag) {
1634 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1635 tcg_gen_movi_i32(cpu_CF, word == 0);
1636 tcg_gen_movi_i32(cpu_VF, 0);
1637 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1638 }
1639 return true;
1640 }
1641
1642 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
1643
1644 /* Note pat == 31 is #all, to set all elements. */
1645 TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
1646 do_predset, 0, FFR_PRED_NUM, 31, false)
1647
1648 /* Note pat == 32 is #unimp, to set no elements. */
1649 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
1650
trans_RDFFR_p(DisasContext * s,arg_RDFFR_p * a)1651 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1652 {
1653 /* The path through do_pppp_flags is complicated enough to want to avoid
1654 * duplication. Frob the arguments into the form of a predicated AND.
1655 */
1656 arg_rprr_s alt_a = {
1657 .rd = a->rd, .pg = a->pg, .s = a->s,
1658 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1659 };
1660
1661 s->is_nonstreaming = true;
1662 return trans_AND_pppp(s, &alt_a);
1663 }
1664
1665 TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
1666 TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
1667
do_pfirst_pnext(DisasContext * s,arg_rr_esz * a,void (* gen_fn)(TCGv_i32,TCGv_ptr,TCGv_ptr,TCGv_i32))1668 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1669 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1670 TCGv_ptr, TCGv_i32))
1671 {
1672 if (!sve_access_check(s)) {
1673 return true;
1674 }
1675
1676 TCGv_ptr t_pd = tcg_temp_new_ptr();
1677 TCGv_ptr t_pg = tcg_temp_new_ptr();
1678 TCGv_i32 t;
1679 unsigned desc = 0;
1680
1681 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1682 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
1683
1684 tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, a->rd));
1685 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->rn));
1686 t = tcg_temp_new_i32();
1687
1688 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
1689
1690 do_pred_flags(t);
1691 return true;
1692 }
1693
TRANS_FEAT(PFIRST,aa64_sve,do_pfirst_pnext,a,gen_helper_sve_pfirst)1694 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
1695 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
1696
1697 /*
1698 *** SVE Element Count Group
1699 */
1700
1701 /* Perform an inline saturating addition of a 32-bit value within
1702 * a 64-bit register. The second operand is known to be positive,
1703 * which halves the comparisons we must perform to bound the result.
1704 */
1705 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1706 {
1707 int64_t ibound;
1708
1709 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1710 if (u) {
1711 tcg_gen_ext32u_i64(reg, reg);
1712 } else {
1713 tcg_gen_ext32s_i64(reg, reg);
1714 }
1715 if (d) {
1716 tcg_gen_sub_i64(reg, reg, val);
1717 ibound = (u ? 0 : INT32_MIN);
1718 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
1719 } else {
1720 tcg_gen_add_i64(reg, reg, val);
1721 ibound = (u ? UINT32_MAX : INT32_MAX);
1722 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
1723 }
1724 }
1725
1726 /* Similarly with 64-bit values. */
do_sat_addsub_64(TCGv_i64 reg,TCGv_i64 val,bool u,bool d)1727 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1728 {
1729 TCGv_i64 t0 = tcg_temp_new_i64();
1730 TCGv_i64 t2;
1731
1732 if (u) {
1733 if (d) {
1734 tcg_gen_sub_i64(t0, reg, val);
1735 t2 = tcg_constant_i64(0);
1736 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
1737 } else {
1738 tcg_gen_add_i64(t0, reg, val);
1739 t2 = tcg_constant_i64(-1);
1740 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
1741 }
1742 } else {
1743 TCGv_i64 t1 = tcg_temp_new_i64();
1744 if (d) {
1745 /* Detect signed overflow for subtraction. */
1746 tcg_gen_xor_i64(t0, reg, val);
1747 tcg_gen_sub_i64(t1, reg, val);
1748 tcg_gen_xor_i64(reg, reg, t1);
1749 tcg_gen_and_i64(t0, t0, reg);
1750
1751 /* Bound the result. */
1752 tcg_gen_movi_i64(reg, INT64_MIN);
1753 t2 = tcg_constant_i64(0);
1754 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1755 } else {
1756 /* Detect signed overflow for addition. */
1757 tcg_gen_xor_i64(t0, reg, val);
1758 tcg_gen_add_i64(reg, reg, val);
1759 tcg_gen_xor_i64(t1, reg, val);
1760 tcg_gen_andc_i64(t0, t1, t0);
1761
1762 /* Bound the result. */
1763 tcg_gen_movi_i64(t1, INT64_MAX);
1764 t2 = tcg_constant_i64(0);
1765 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1766 }
1767 }
1768 }
1769
1770 /* Similarly with a vector and a scalar operand. */
do_sat_addsub_vec(DisasContext * s,int esz,int rd,int rn,TCGv_i64 val,bool u,bool d)1771 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1772 TCGv_i64 val, bool u, bool d)
1773 {
1774 unsigned vsz = vec_full_reg_size(s);
1775 TCGv_ptr dptr, nptr;
1776 TCGv_i32 t32, desc;
1777 TCGv_i64 t64;
1778
1779 dptr = tcg_temp_new_ptr();
1780 nptr = tcg_temp_new_ptr();
1781 tcg_gen_addi_ptr(dptr, tcg_env, vec_full_reg_offset(s, rd));
1782 tcg_gen_addi_ptr(nptr, tcg_env, vec_full_reg_offset(s, rn));
1783 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1784
1785 switch (esz) {
1786 case MO_8:
1787 t32 = tcg_temp_new_i32();
1788 tcg_gen_extrl_i64_i32(t32, val);
1789 if (d) {
1790 tcg_gen_neg_i32(t32, t32);
1791 }
1792 if (u) {
1793 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1794 } else {
1795 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1796 }
1797 break;
1798
1799 case MO_16:
1800 t32 = tcg_temp_new_i32();
1801 tcg_gen_extrl_i64_i32(t32, val);
1802 if (d) {
1803 tcg_gen_neg_i32(t32, t32);
1804 }
1805 if (u) {
1806 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1807 } else {
1808 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1809 }
1810 break;
1811
1812 case MO_32:
1813 t64 = tcg_temp_new_i64();
1814 if (d) {
1815 tcg_gen_neg_i64(t64, val);
1816 } else {
1817 tcg_gen_mov_i64(t64, val);
1818 }
1819 if (u) {
1820 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1821 } else {
1822 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1823 }
1824 break;
1825
1826 case MO_64:
1827 if (u) {
1828 if (d) {
1829 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1830 } else {
1831 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1832 }
1833 } else if (d) {
1834 t64 = tcg_temp_new_i64();
1835 tcg_gen_neg_i64(t64, val);
1836 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1837 } else {
1838 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1839 }
1840 break;
1841
1842 default:
1843 g_assert_not_reached();
1844 }
1845 }
1846
trans_CNT_r(DisasContext * s,arg_CNT_r * a)1847 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1848 {
1849 if (!dc_isar_feature(aa64_sve, s)) {
1850 return false;
1851 }
1852 if (sve_access_check(s)) {
1853 unsigned fullsz = vec_full_reg_size(s);
1854 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1855 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1856 }
1857 return true;
1858 }
1859
trans_INCDEC_r(DisasContext * s,arg_incdec_cnt * a)1860 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1861 {
1862 if (!dc_isar_feature(aa64_sve, s)) {
1863 return false;
1864 }
1865 if (sve_access_check(s)) {
1866 unsigned fullsz = vec_full_reg_size(s);
1867 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1868 int inc = numelem * a->imm * (a->d ? -1 : 1);
1869 TCGv_i64 reg = cpu_reg(s, a->rd);
1870
1871 tcg_gen_addi_i64(reg, reg, inc);
1872 }
1873 return true;
1874 }
1875
trans_SINCDEC_r_32(DisasContext * s,arg_incdec_cnt * a)1876 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1877 {
1878 if (!dc_isar_feature(aa64_sve, s)) {
1879 return false;
1880 }
1881 if (!sve_access_check(s)) {
1882 return true;
1883 }
1884
1885 unsigned fullsz = vec_full_reg_size(s);
1886 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1887 int inc = numelem * a->imm;
1888 TCGv_i64 reg = cpu_reg(s, a->rd);
1889
1890 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1891 if (inc == 0) {
1892 if (a->u) {
1893 tcg_gen_ext32u_i64(reg, reg);
1894 } else {
1895 tcg_gen_ext32s_i64(reg, reg);
1896 }
1897 } else {
1898 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
1899 }
1900 return true;
1901 }
1902
trans_SINCDEC_r_64(DisasContext * s,arg_incdec_cnt * a)1903 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1904 {
1905 if (!dc_isar_feature(aa64_sve, s)) {
1906 return false;
1907 }
1908 if (!sve_access_check(s)) {
1909 return true;
1910 }
1911
1912 unsigned fullsz = vec_full_reg_size(s);
1913 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1914 int inc = numelem * a->imm;
1915 TCGv_i64 reg = cpu_reg(s, a->rd);
1916
1917 if (inc != 0) {
1918 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
1919 }
1920 return true;
1921 }
1922
trans_INCDEC_v(DisasContext * s,arg_incdec2_cnt * a)1923 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1924 {
1925 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
1926 return false;
1927 }
1928
1929 unsigned fullsz = vec_full_reg_size(s);
1930 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1931 int inc = numelem * a->imm;
1932
1933 if (inc != 0) {
1934 if (sve_access_check(s)) {
1935 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1936 vec_full_reg_offset(s, a->rn),
1937 tcg_constant_i64(a->d ? -inc : inc),
1938 fullsz, fullsz);
1939 }
1940 } else {
1941 do_mov_z(s, a->rd, a->rn);
1942 }
1943 return true;
1944 }
1945
trans_SINCDEC_v(DisasContext * s,arg_incdec2_cnt * a)1946 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1947 {
1948 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
1949 return false;
1950 }
1951
1952 unsigned fullsz = vec_full_reg_size(s);
1953 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1954 int inc = numelem * a->imm;
1955
1956 if (inc != 0) {
1957 if (sve_access_check(s)) {
1958 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
1959 tcg_constant_i64(inc), a->u, a->d);
1960 }
1961 } else {
1962 do_mov_z(s, a->rd, a->rn);
1963 }
1964 return true;
1965 }
1966
1967 /*
1968 *** SVE Bitwise Immediate Group
1969 */
1970
do_zz_dbm(DisasContext * s,arg_rr_dbm * a,GVecGen2iFn * gvec_fn)1971 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1972 {
1973 uint64_t imm;
1974 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1975 extract32(a->dbm, 0, 6),
1976 extract32(a->dbm, 6, 6))) {
1977 return false;
1978 }
1979 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
1980 }
1981
TRANS_FEAT(AND_zzi,aa64_sve,do_zz_dbm,a,tcg_gen_gvec_andi)1982 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
1983 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
1984 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
1985
1986 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1987 {
1988 uint64_t imm;
1989
1990 if (!dc_isar_feature(aa64_sve, s)) {
1991 return false;
1992 }
1993 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1994 extract32(a->dbm, 0, 6),
1995 extract32(a->dbm, 6, 6))) {
1996 return false;
1997 }
1998 if (sve_access_check(s)) {
1999 do_dupi_z(s, a->rd, imm);
2000 }
2001 return true;
2002 }
2003
2004 /*
2005 *** SVE Integer Wide Immediate - Predicated Group
2006 */
2007
2008 /* Implement all merging copies. This is used for CPY (immediate),
2009 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2010 */
do_cpy_m(DisasContext * s,int esz,int rd,int rn,int pg,TCGv_i64 val)2011 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2012 TCGv_i64 val)
2013 {
2014 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2015 static gen_cpy * const fns[4] = {
2016 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2017 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2018 };
2019 unsigned vsz = vec_full_reg_size(s);
2020 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
2021 TCGv_ptr t_zd = tcg_temp_new_ptr();
2022 TCGv_ptr t_zn = tcg_temp_new_ptr();
2023 TCGv_ptr t_pg = tcg_temp_new_ptr();
2024
2025 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd));
2026 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, rn));
2027 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
2028
2029 fns[esz](t_zd, t_zn, t_pg, val, desc);
2030 }
2031
trans_FCPY(DisasContext * s,arg_FCPY * a)2032 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
2033 {
2034 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
2035 return false;
2036 }
2037 if (sve_access_check(s)) {
2038 /* Decode the VFP immediate. */
2039 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
2040 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
2041 }
2042 return true;
2043 }
2044
trans_CPY_m_i(DisasContext * s,arg_rpri_esz * a)2045 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
2046 {
2047 if (!dc_isar_feature(aa64_sve, s)) {
2048 return false;
2049 }
2050 if (sve_access_check(s)) {
2051 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
2052 }
2053 return true;
2054 }
2055
trans_CPY_z_i(DisasContext * s,arg_CPY_z_i * a)2056 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
2057 {
2058 static gen_helper_gvec_2i * const fns[4] = {
2059 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2060 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2061 };
2062
2063 if (!dc_isar_feature(aa64_sve, s)) {
2064 return false;
2065 }
2066 if (sve_access_check(s)) {
2067 unsigned vsz = vec_full_reg_size(s);
2068 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2069 pred_full_reg_offset(s, a->pg),
2070 tcg_constant_i64(a->imm),
2071 vsz, vsz, 0, fns[a->esz]);
2072 }
2073 return true;
2074 }
2075
2076 /*
2077 *** SVE Permute Extract Group
2078 */
2079
do_EXT(DisasContext * s,int rd,int rn,int rm,int imm)2080 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
2081 {
2082 if (!sve_access_check(s)) {
2083 return true;
2084 }
2085
2086 unsigned vsz = vec_full_reg_size(s);
2087 unsigned n_ofs = imm >= vsz ? 0 : imm;
2088 unsigned n_siz = vsz - n_ofs;
2089 unsigned d = vec_full_reg_offset(s, rd);
2090 unsigned n = vec_full_reg_offset(s, rn);
2091 unsigned m = vec_full_reg_offset(s, rm);
2092
2093 /* Use host vector move insns if we have appropriate sizes
2094 * and no unfortunate overlap.
2095 */
2096 if (m != d
2097 && n_ofs == size_for_gvec(n_ofs)
2098 && n_siz == size_for_gvec(n_siz)
2099 && (d != n || n_siz <= n_ofs)) {
2100 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2101 if (n_ofs != 0) {
2102 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2103 }
2104 } else {
2105 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2106 }
2107 return true;
2108 }
2109
2110 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
2111 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
2112
2113 /*
2114 *** SVE Permute - Unpredicated Group
2115 */
2116
trans_DUP_s(DisasContext * s,arg_DUP_s * a)2117 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2118 {
2119 if (!dc_isar_feature(aa64_sve, s)) {
2120 return false;
2121 }
2122 if (sve_access_check(s)) {
2123 unsigned vsz = vec_full_reg_size(s);
2124 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2125 vsz, vsz, cpu_reg_sp(s, a->rn));
2126 }
2127 return true;
2128 }
2129
trans_DUP_x(DisasContext * s,arg_DUP_x * a)2130 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2131 {
2132 if (!dc_isar_feature(aa64_sve, s)) {
2133 return false;
2134 }
2135 if ((a->imm & 0x1f) == 0) {
2136 return false;
2137 }
2138 if (sve_access_check(s)) {
2139 unsigned vsz = vec_full_reg_size(s);
2140 unsigned dofs = vec_full_reg_offset(s, a->rd);
2141 unsigned esz, index;
2142
2143 esz = ctz32(a->imm);
2144 index = a->imm >> (esz + 1);
2145
2146 if ((index << esz) < vsz) {
2147 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2148 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2149 } else {
2150 /*
2151 * While dup_mem handles 128-bit elements, dup_imm does not.
2152 * Thankfully element size doesn't matter for splatting zero.
2153 */
2154 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2155 }
2156 }
2157 return true;
2158 }
2159
do_insr_i64(DisasContext * s,arg_rrr_esz * a,TCGv_i64 val)2160 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2161 {
2162 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2163 static gen_insr * const fns[4] = {
2164 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2165 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2166 };
2167 unsigned vsz = vec_full_reg_size(s);
2168 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
2169 TCGv_ptr t_zd = tcg_temp_new_ptr();
2170 TCGv_ptr t_zn = tcg_temp_new_ptr();
2171
2172 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, a->rd));
2173 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
2174
2175 fns[a->esz](t_zd, t_zn, val, desc);
2176 }
2177
trans_INSR_f(DisasContext * s,arg_rrr_esz * a)2178 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2179 {
2180 if (!dc_isar_feature(aa64_sve, s)) {
2181 return false;
2182 }
2183 if (sve_access_check(s)) {
2184 TCGv_i64 t = tcg_temp_new_i64();
2185 tcg_gen_ld_i64(t, tcg_env, vec_reg_offset(s, a->rm, 0, MO_64));
2186 do_insr_i64(s, a, t);
2187 }
2188 return true;
2189 }
2190
trans_INSR_r(DisasContext * s,arg_rrr_esz * a)2191 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2192 {
2193 if (!dc_isar_feature(aa64_sve, s)) {
2194 return false;
2195 }
2196 if (sve_access_check(s)) {
2197 do_insr_i64(s, a, cpu_reg(s, a->rm));
2198 }
2199 return true;
2200 }
2201
2202 static gen_helper_gvec_2 * const rev_fns[4] = {
2203 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2204 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2205 };
2206 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
2207
2208 static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2209 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2210 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2211 };
2212 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
2213
2214 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2215 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2216 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2217 };
2218 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2219 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
2220
2221 static gen_helper_gvec_3 * const tbx_fns[4] = {
2222 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2223 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2224 };
2225 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
2226
trans_UNPK(DisasContext * s,arg_UNPK * a)2227 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2228 {
2229 static gen_helper_gvec_2 * const fns[4][2] = {
2230 { NULL, NULL },
2231 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2232 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2233 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2234 };
2235
2236 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
2237 return false;
2238 }
2239 if (sve_access_check(s)) {
2240 unsigned vsz = vec_full_reg_size(s);
2241 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2242 vec_full_reg_offset(s, a->rn)
2243 + (a->h ? vsz / 2 : 0),
2244 vsz, vsz, 0, fns[a->esz][a->u]);
2245 }
2246 return true;
2247 }
2248
2249 /*
2250 *** SVE Permute - Predicates Group
2251 */
2252
do_perm_pred3(DisasContext * s,arg_rrr_esz * a,bool high_odd,gen_helper_gvec_3 * fn)2253 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2254 gen_helper_gvec_3 *fn)
2255 {
2256 if (!sve_access_check(s)) {
2257 return true;
2258 }
2259
2260 unsigned vsz = pred_full_reg_size(s);
2261
2262 TCGv_ptr t_d = tcg_temp_new_ptr();
2263 TCGv_ptr t_n = tcg_temp_new_ptr();
2264 TCGv_ptr t_m = tcg_temp_new_ptr();
2265 uint32_t desc = 0;
2266
2267 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2268 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2269 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2270
2271 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd));
2272 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn));
2273 tcg_gen_addi_ptr(t_m, tcg_env, pred_full_reg_offset(s, a->rm));
2274
2275 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
2276 return true;
2277 }
2278
do_perm_pred2(DisasContext * s,arg_rr_esz * a,bool high_odd,gen_helper_gvec_2 * fn)2279 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2280 gen_helper_gvec_2 *fn)
2281 {
2282 if (!sve_access_check(s)) {
2283 return true;
2284 }
2285
2286 unsigned vsz = pred_full_reg_size(s);
2287 TCGv_ptr t_d = tcg_temp_new_ptr();
2288 TCGv_ptr t_n = tcg_temp_new_ptr();
2289 uint32_t desc = 0;
2290
2291 tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd));
2292 tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn));
2293
2294 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2295 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2296 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2297
2298 fn(t_d, t_n, tcg_constant_i32(desc));
2299 return true;
2300 }
2301
2302 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
2303 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
2304 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
2305 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
2306 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
2307 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
2308
2309 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
2310 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
2311 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
2312
2313 /*
2314 *** SVE Permute - Interleaving Group
2315 */
2316
2317 static gen_helper_gvec_3 * const zip_fns[4] = {
2318 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2319 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2320 };
2321 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2322 zip_fns[a->esz], a, 0)
2323 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2324 zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
2325
2326 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2327 gen_helper_sve2_zip_q, a, 0)
2328 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2329 gen_helper_sve2_zip_q, a,
2330 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
2331
2332 static gen_helper_gvec_3 * const uzp_fns[4] = {
2333 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2334 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2335 };
2336
2337 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2338 uzp_fns[a->esz], a, 0)
2339 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2340 uzp_fns[a->esz], a, 1 << a->esz)
2341
2342 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2343 gen_helper_sve2_uzp_q, a, 0)
2344 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2345 gen_helper_sve2_uzp_q, a, 16)
2346
2347 static gen_helper_gvec_3 * const trn_fns[4] = {
2348 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2349 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2350 };
2351
2352 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2353 trn_fns[a->esz], a, 0)
2354 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2355 trn_fns[a->esz], a, 1 << a->esz)
2356
2357 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2358 gen_helper_sve2_trn_q, a, 0)
2359 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2360 gen_helper_sve2_trn_q, a, 16)
2361
2362 /*
2363 *** SVE Permute Vector - Predicated Group
2364 */
2365
2366 static gen_helper_gvec_3 * const compact_fns[4] = {
2367 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2368 };
2369 TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
2370 compact_fns[a->esz], a, 0)
2371
2372 /* Call the helper that computes the ARM LastActiveElement pseudocode
2373 * function, scaled by the element size. This includes the not found
2374 * indication; e.g. not found for esz=3 is -8.
2375 */
find_last_active(DisasContext * s,TCGv_i32 ret,int esz,int pg)2376 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2377 {
2378 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2379 * round up, as we do elsewhere, because we need the exact size.
2380 */
2381 TCGv_ptr t_p = tcg_temp_new_ptr();
2382 unsigned desc = 0;
2383
2384 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2385 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2386
2387 tcg_gen_addi_ptr(t_p, tcg_env, pred_full_reg_offset(s, pg));
2388
2389 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
2390 }
2391
2392 /* Increment LAST to the offset of the next element in the vector,
2393 * wrapping around to 0.
2394 */
incr_last_active(DisasContext * s,TCGv_i32 last,int esz)2395 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2396 {
2397 unsigned vsz = vec_full_reg_size(s);
2398
2399 tcg_gen_addi_i32(last, last, 1 << esz);
2400 if (is_power_of_2(vsz)) {
2401 tcg_gen_andi_i32(last, last, vsz - 1);
2402 } else {
2403 TCGv_i32 max = tcg_constant_i32(vsz);
2404 TCGv_i32 zero = tcg_constant_i32(0);
2405 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2406 }
2407 }
2408
2409 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
wrap_last_active(DisasContext * s,TCGv_i32 last,int esz)2410 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2411 {
2412 unsigned vsz = vec_full_reg_size(s);
2413
2414 if (is_power_of_2(vsz)) {
2415 tcg_gen_andi_i32(last, last, vsz - 1);
2416 } else {
2417 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2418 TCGv_i32 zero = tcg_constant_i32(0);
2419 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2420 }
2421 }
2422
2423 /* Load an unsigned element of ESZ from BASE+OFS. */
load_esz(TCGv_ptr base,int ofs,int esz)2424 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2425 {
2426 TCGv_i64 r = tcg_temp_new_i64();
2427
2428 switch (esz) {
2429 case 0:
2430 tcg_gen_ld8u_i64(r, base, ofs);
2431 break;
2432 case 1:
2433 tcg_gen_ld16u_i64(r, base, ofs);
2434 break;
2435 case 2:
2436 tcg_gen_ld32u_i64(r, base, ofs);
2437 break;
2438 case 3:
2439 tcg_gen_ld_i64(r, base, ofs);
2440 break;
2441 default:
2442 g_assert_not_reached();
2443 }
2444 return r;
2445 }
2446
2447 /* Load an unsigned element of ESZ from RM[LAST]. */
load_last_active(DisasContext * s,TCGv_i32 last,int rm,int esz)2448 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2449 int rm, int esz)
2450 {
2451 TCGv_ptr p = tcg_temp_new_ptr();
2452
2453 /* Convert offset into vector into offset into ENV.
2454 * The final adjustment for the vector register base
2455 * is added via constant offset to the load.
2456 */
2457 #if HOST_BIG_ENDIAN
2458 /* Adjust for element ordering. See vec_reg_offset. */
2459 if (esz < 3) {
2460 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2461 }
2462 #endif
2463 tcg_gen_ext_i32_ptr(p, last);
2464 tcg_gen_add_ptr(p, p, tcg_env);
2465
2466 return load_esz(p, vec_full_reg_offset(s, rm), esz);
2467 }
2468
2469 /* Compute CLAST for a Zreg. */
do_clast_vector(DisasContext * s,arg_rprr_esz * a,bool before)2470 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2471 {
2472 TCGv_i32 last;
2473 TCGLabel *over;
2474 TCGv_i64 ele;
2475 unsigned vsz, esz = a->esz;
2476
2477 if (!sve_access_check(s)) {
2478 return true;
2479 }
2480
2481 last = tcg_temp_new_i32();
2482 over = gen_new_label();
2483
2484 find_last_active(s, last, esz, a->pg);
2485
2486 /* There is of course no movcond for a 2048-bit vector,
2487 * so we must branch over the actual store.
2488 */
2489 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2490
2491 if (!before) {
2492 incr_last_active(s, last, esz);
2493 }
2494
2495 ele = load_last_active(s, last, a->rm, esz);
2496
2497 vsz = vec_full_reg_size(s);
2498 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2499
2500 /* If this insn used MOVPRFX, we may need a second move. */
2501 if (a->rd != a->rn) {
2502 TCGLabel *done = gen_new_label();
2503 tcg_gen_br(done);
2504
2505 gen_set_label(over);
2506 do_mov_z(s, a->rd, a->rn);
2507
2508 gen_set_label(done);
2509 } else {
2510 gen_set_label(over);
2511 }
2512 return true;
2513 }
2514
TRANS_FEAT(CLASTA_z,aa64_sve,do_clast_vector,a,false)2515 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
2516 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
2517
2518 /* Compute CLAST for a scalar. */
2519 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2520 bool before, TCGv_i64 reg_val)
2521 {
2522 TCGv_i32 last = tcg_temp_new_i32();
2523 TCGv_i64 ele, cmp;
2524
2525 find_last_active(s, last, esz, pg);
2526
2527 /* Extend the original value of last prior to incrementing. */
2528 cmp = tcg_temp_new_i64();
2529 tcg_gen_ext_i32_i64(cmp, last);
2530
2531 if (!before) {
2532 incr_last_active(s, last, esz);
2533 }
2534
2535 /* The conceit here is that while last < 0 indicates not found, after
2536 * adjusting for tcg_env->vfp.zregs[rm], it is still a valid address
2537 * from which we can load garbage. We then discard the garbage with
2538 * a conditional move.
2539 */
2540 ele = load_last_active(s, last, rm, esz);
2541
2542 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2543 ele, reg_val);
2544 }
2545
2546 /* Compute CLAST for a Vreg. */
do_clast_fp(DisasContext * s,arg_rpr_esz * a,bool before)2547 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2548 {
2549 if (sve_access_check(s)) {
2550 int esz = a->esz;
2551 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2552 TCGv_i64 reg = load_esz(tcg_env, ofs, esz);
2553
2554 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2555 write_fp_dreg(s, a->rd, reg);
2556 }
2557 return true;
2558 }
2559
TRANS_FEAT(CLASTA_v,aa64_sve,do_clast_fp,a,false)2560 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
2561 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
2562
2563 /* Compute CLAST for a Xreg. */
2564 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2565 {
2566 TCGv_i64 reg;
2567
2568 if (!sve_access_check(s)) {
2569 return true;
2570 }
2571
2572 reg = cpu_reg(s, a->rd);
2573 switch (a->esz) {
2574 case 0:
2575 tcg_gen_ext8u_i64(reg, reg);
2576 break;
2577 case 1:
2578 tcg_gen_ext16u_i64(reg, reg);
2579 break;
2580 case 2:
2581 tcg_gen_ext32u_i64(reg, reg);
2582 break;
2583 case 3:
2584 break;
2585 default:
2586 g_assert_not_reached();
2587 }
2588
2589 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2590 return true;
2591 }
2592
TRANS_FEAT(CLASTA_r,aa64_sve,do_clast_general,a,false)2593 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
2594 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
2595
2596 /* Compute LAST for a scalar. */
2597 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2598 int pg, int rm, bool before)
2599 {
2600 TCGv_i32 last = tcg_temp_new_i32();
2601
2602 find_last_active(s, last, esz, pg);
2603 if (before) {
2604 wrap_last_active(s, last, esz);
2605 } else {
2606 incr_last_active(s, last, esz);
2607 }
2608
2609 return load_last_active(s, last, rm, esz);
2610 }
2611
2612 /* Compute LAST for a Vreg. */
do_last_fp(DisasContext * s,arg_rpr_esz * a,bool before)2613 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2614 {
2615 if (sve_access_check(s)) {
2616 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2617 write_fp_dreg(s, a->rd, val);
2618 }
2619 return true;
2620 }
2621
TRANS_FEAT(LASTA_v,aa64_sve,do_last_fp,a,false)2622 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
2623 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
2624
2625 /* Compute LAST for a Xreg. */
2626 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2627 {
2628 if (sve_access_check(s)) {
2629 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2630 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2631 }
2632 return true;
2633 }
2634
TRANS_FEAT(LASTA_r,aa64_sve,do_last_general,a,false)2635 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
2636 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
2637
2638 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2639 {
2640 if (!dc_isar_feature(aa64_sve, s)) {
2641 return false;
2642 }
2643 if (sve_access_check(s)) {
2644 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2645 }
2646 return true;
2647 }
2648
trans_CPY_m_v(DisasContext * s,arg_rpr_esz * a)2649 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2650 {
2651 if (!dc_isar_feature(aa64_sve, s)) {
2652 return false;
2653 }
2654 if (sve_access_check(s)) {
2655 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2656 TCGv_i64 t = load_esz(tcg_env, ofs, a->esz);
2657 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2658 }
2659 return true;
2660 }
2661
2662 static gen_helper_gvec_3 * const revb_fns[4] = {
2663 NULL, gen_helper_sve_revb_h,
2664 gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2665 };
2666 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
2667
2668 static gen_helper_gvec_3 * const revh_fns[4] = {
2669 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2670 };
2671 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
2672
2673 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2674 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
2675
2676 TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
2677
2678 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
2679 gen_helper_sve_splice, a, a->esz)
2680
2681 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
2682 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
2683
2684 /*
2685 *** SVE Integer Compare - Vectors Group
2686 */
2687
do_ppzz_flags(DisasContext * s,arg_rprr_esz * a,gen_helper_gvec_flags_4 * gen_fn)2688 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2689 gen_helper_gvec_flags_4 *gen_fn)
2690 {
2691 TCGv_ptr pd, zn, zm, pg;
2692 unsigned vsz;
2693 TCGv_i32 t;
2694
2695 if (gen_fn == NULL) {
2696 return false;
2697 }
2698 if (!sve_access_check(s)) {
2699 return true;
2700 }
2701
2702 vsz = vec_full_reg_size(s);
2703 t = tcg_temp_new_i32();
2704 pd = tcg_temp_new_ptr();
2705 zn = tcg_temp_new_ptr();
2706 zm = tcg_temp_new_ptr();
2707 pg = tcg_temp_new_ptr();
2708
2709 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd));
2710 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn));
2711 tcg_gen_addi_ptr(zm, tcg_env, vec_full_reg_offset(s, a->rm));
2712 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg));
2713
2714 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
2715
2716 do_pred_flags(t);
2717 return true;
2718 }
2719
2720 #define DO_PPZZ(NAME, name) \
2721 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \
2722 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2723 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2724 }; \
2725 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \
2726 a, name##_ppzz_fns[a->esz])
2727
DO_PPZZ(CMPEQ,cmpeq)2728 DO_PPZZ(CMPEQ, cmpeq)
2729 DO_PPZZ(CMPNE, cmpne)
2730 DO_PPZZ(CMPGT, cmpgt)
2731 DO_PPZZ(CMPGE, cmpge)
2732 DO_PPZZ(CMPHI, cmphi)
2733 DO_PPZZ(CMPHS, cmphs)
2734
2735 #undef DO_PPZZ
2736
2737 #define DO_PPZW(NAME, name) \
2738 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \
2739 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2740 gen_helper_sve_##name##_ppzw_s, NULL \
2741 }; \
2742 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \
2743 a, name##_ppzw_fns[a->esz])
2744
2745 DO_PPZW(CMPEQ, cmpeq)
2746 DO_PPZW(CMPNE, cmpne)
2747 DO_PPZW(CMPGT, cmpgt)
2748 DO_PPZW(CMPGE, cmpge)
2749 DO_PPZW(CMPHI, cmphi)
2750 DO_PPZW(CMPHS, cmphs)
2751 DO_PPZW(CMPLT, cmplt)
2752 DO_PPZW(CMPLE, cmple)
2753 DO_PPZW(CMPLO, cmplo)
2754 DO_PPZW(CMPLS, cmpls)
2755
2756 #undef DO_PPZW
2757
2758 /*
2759 *** SVE Integer Compare - Immediate Groups
2760 */
2761
2762 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2763 gen_helper_gvec_flags_3 *gen_fn)
2764 {
2765 TCGv_ptr pd, zn, pg;
2766 unsigned vsz;
2767 TCGv_i32 t;
2768
2769 if (gen_fn == NULL) {
2770 return false;
2771 }
2772 if (!sve_access_check(s)) {
2773 return true;
2774 }
2775
2776 vsz = vec_full_reg_size(s);
2777 t = tcg_temp_new_i32();
2778 pd = tcg_temp_new_ptr();
2779 zn = tcg_temp_new_ptr();
2780 pg = tcg_temp_new_ptr();
2781
2782 tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd));
2783 tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn));
2784 tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg));
2785
2786 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
2787
2788 do_pred_flags(t);
2789 return true;
2790 }
2791
2792 #define DO_PPZI(NAME, name) \
2793 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \
2794 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2795 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2796 }; \
2797 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \
2798 name##_ppzi_fns[a->esz])
2799
DO_PPZI(CMPEQ,cmpeq)2800 DO_PPZI(CMPEQ, cmpeq)
2801 DO_PPZI(CMPNE, cmpne)
2802 DO_PPZI(CMPGT, cmpgt)
2803 DO_PPZI(CMPGE, cmpge)
2804 DO_PPZI(CMPHI, cmphi)
2805 DO_PPZI(CMPHS, cmphs)
2806 DO_PPZI(CMPLT, cmplt)
2807 DO_PPZI(CMPLE, cmple)
2808 DO_PPZI(CMPLO, cmplo)
2809 DO_PPZI(CMPLS, cmpls)
2810
2811 #undef DO_PPZI
2812
2813 /*
2814 *** SVE Partition Break Group
2815 */
2816
2817 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2818 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2819 {
2820 if (!sve_access_check(s)) {
2821 return true;
2822 }
2823
2824 unsigned vsz = pred_full_reg_size(s);
2825
2826 /* Predicate sizes may be smaller and cannot use simd_desc. */
2827 TCGv_ptr d = tcg_temp_new_ptr();
2828 TCGv_ptr n = tcg_temp_new_ptr();
2829 TCGv_ptr m = tcg_temp_new_ptr();
2830 TCGv_ptr g = tcg_temp_new_ptr();
2831 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
2832
2833 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd));
2834 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn));
2835 tcg_gen_addi_ptr(m, tcg_env, pred_full_reg_offset(s, a->rm));
2836 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg));
2837
2838 if (a->s) {
2839 TCGv_i32 t = tcg_temp_new_i32();
2840 fn_s(t, d, n, m, g, desc);
2841 do_pred_flags(t);
2842 } else {
2843 fn(d, n, m, g, desc);
2844 }
2845 return true;
2846 }
2847
do_brk2(DisasContext * s,arg_rpr_s * a,gen_helper_gvec_3 * fn,gen_helper_gvec_flags_3 * fn_s)2848 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2849 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2850 {
2851 if (!sve_access_check(s)) {
2852 return true;
2853 }
2854
2855 unsigned vsz = pred_full_reg_size(s);
2856
2857 /* Predicate sizes may be smaller and cannot use simd_desc. */
2858 TCGv_ptr d = tcg_temp_new_ptr();
2859 TCGv_ptr n = tcg_temp_new_ptr();
2860 TCGv_ptr g = tcg_temp_new_ptr();
2861 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
2862
2863 tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd));
2864 tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn));
2865 tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg));
2866
2867 if (a->s) {
2868 TCGv_i32 t = tcg_temp_new_i32();
2869 fn_s(t, d, n, g, desc);
2870 do_pred_flags(t);
2871 } else {
2872 fn(d, n, g, desc);
2873 }
2874 return true;
2875 }
2876
TRANS_FEAT(BRKPA,aa64_sve,do_brk3,a,gen_helper_sve_brkpa,gen_helper_sve_brkpas)2877 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
2878 gen_helper_sve_brkpa, gen_helper_sve_brkpas)
2879 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
2880 gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
2881
2882 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
2883 gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
2884 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
2885 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
2886
2887 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
2888 gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
2889 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
2890 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
2891
2892 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
2893 gen_helper_sve_brkn, gen_helper_sve_brkns)
2894
2895 /*
2896 *** SVE Predicate Count Group
2897 */
2898
2899 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2900 {
2901 unsigned psz = pred_full_reg_size(s);
2902
2903 if (psz <= 8) {
2904 uint64_t psz_mask;
2905
2906 tcg_gen_ld_i64(val, tcg_env, pred_full_reg_offset(s, pn));
2907 if (pn != pg) {
2908 TCGv_i64 g = tcg_temp_new_i64();
2909 tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, pg));
2910 tcg_gen_and_i64(val, val, g);
2911 }
2912
2913 /* Reduce the pred_esz_masks value simply to reduce the
2914 * size of the code generated here.
2915 */
2916 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2917 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2918
2919 tcg_gen_ctpop_i64(val, val);
2920 } else {
2921 TCGv_ptr t_pn = tcg_temp_new_ptr();
2922 TCGv_ptr t_pg = tcg_temp_new_ptr();
2923 unsigned desc = 0;
2924
2925 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
2926 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2927
2928 tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, pn));
2929 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
2930
2931 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
2932 }
2933 }
2934
trans_CNTP(DisasContext * s,arg_CNTP * a)2935 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
2936 {
2937 if (!dc_isar_feature(aa64_sve, s)) {
2938 return false;
2939 }
2940 if (sve_access_check(s)) {
2941 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
2942 }
2943 return true;
2944 }
2945
trans_INCDECP_r(DisasContext * s,arg_incdec_pred * a)2946 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
2947 {
2948 if (!dc_isar_feature(aa64_sve, s)) {
2949 return false;
2950 }
2951 if (sve_access_check(s)) {
2952 TCGv_i64 reg = cpu_reg(s, a->rd);
2953 TCGv_i64 val = tcg_temp_new_i64();
2954
2955 do_cntp(s, val, a->esz, a->pg, a->pg);
2956 if (a->d) {
2957 tcg_gen_sub_i64(reg, reg, val);
2958 } else {
2959 tcg_gen_add_i64(reg, reg, val);
2960 }
2961 }
2962 return true;
2963 }
2964
trans_INCDECP_z(DisasContext * s,arg_incdec2_pred * a)2965 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
2966 {
2967 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
2968 return false;
2969 }
2970 if (sve_access_check(s)) {
2971 unsigned vsz = vec_full_reg_size(s);
2972 TCGv_i64 val = tcg_temp_new_i64();
2973 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
2974
2975 do_cntp(s, val, a->esz, a->pg, a->pg);
2976 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
2977 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
2978 }
2979 return true;
2980 }
2981
trans_SINCDECP_r_32(DisasContext * s,arg_incdec_pred * a)2982 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
2983 {
2984 if (!dc_isar_feature(aa64_sve, s)) {
2985 return false;
2986 }
2987 if (sve_access_check(s)) {
2988 TCGv_i64 reg = cpu_reg(s, a->rd);
2989 TCGv_i64 val = tcg_temp_new_i64();
2990
2991 do_cntp(s, val, a->esz, a->pg, a->pg);
2992 do_sat_addsub_32(reg, val, a->u, a->d);
2993 }
2994 return true;
2995 }
2996
trans_SINCDECP_r_64(DisasContext * s,arg_incdec_pred * a)2997 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
2998 {
2999 if (!dc_isar_feature(aa64_sve, s)) {
3000 return false;
3001 }
3002 if (sve_access_check(s)) {
3003 TCGv_i64 reg = cpu_reg(s, a->rd);
3004 TCGv_i64 val = tcg_temp_new_i64();
3005
3006 do_cntp(s, val, a->esz, a->pg, a->pg);
3007 do_sat_addsub_64(reg, val, a->u, a->d);
3008 }
3009 return true;
3010 }
3011
trans_SINCDECP_z(DisasContext * s,arg_incdec2_pred * a)3012 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3013 {
3014 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
3015 return false;
3016 }
3017 if (sve_access_check(s)) {
3018 TCGv_i64 val = tcg_temp_new_i64();
3019 do_cntp(s, val, a->esz, a->pg, a->pg);
3020 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3021 }
3022 return true;
3023 }
3024
3025 /*
3026 *** SVE Integer Compare Scalars Group
3027 */
3028
trans_CTERM(DisasContext * s,arg_CTERM * a)3029 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3030 {
3031 if (!dc_isar_feature(aa64_sve, s)) {
3032 return false;
3033 }
3034 if (!sve_access_check(s)) {
3035 return true;
3036 }
3037
3038 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3039 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3040 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3041 TCGv_i64 cmp = tcg_temp_new_i64();
3042
3043 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3044 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3045
3046 /* VF = !NF & !CF. */
3047 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3048 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3049
3050 /* Both NF and VF actually look at bit 31. */
3051 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3052 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3053 return true;
3054 }
3055
trans_WHILE(DisasContext * s,arg_WHILE * a)3056 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3057 {
3058 TCGv_i64 op0, op1, t0, t1, tmax;
3059 TCGv_i32 t2;
3060 TCGv_ptr ptr;
3061 unsigned vsz = vec_full_reg_size(s);
3062 unsigned desc = 0;
3063 TCGCond cond;
3064 uint64_t maxval;
3065 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3066 bool eq = a->eq == a->lt;
3067
3068 /* The greater-than conditions are all SVE2. */
3069 if (a->lt
3070 ? !dc_isar_feature(aa64_sve, s)
3071 : !dc_isar_feature(aa64_sve2, s)) {
3072 return false;
3073 }
3074 if (!sve_access_check(s)) {
3075 return true;
3076 }
3077
3078 op0 = read_cpu_reg(s, a->rn, 1);
3079 op1 = read_cpu_reg(s, a->rm, 1);
3080
3081 if (!a->sf) {
3082 if (a->u) {
3083 tcg_gen_ext32u_i64(op0, op0);
3084 tcg_gen_ext32u_i64(op1, op1);
3085 } else {
3086 tcg_gen_ext32s_i64(op0, op0);
3087 tcg_gen_ext32s_i64(op1, op1);
3088 }
3089 }
3090
3091 /* For the helper, compress the different conditions into a computation
3092 * of how many iterations for which the condition is true.
3093 */
3094 t0 = tcg_temp_new_i64();
3095 t1 = tcg_temp_new_i64();
3096
3097 if (a->lt) {
3098 tcg_gen_sub_i64(t0, op1, op0);
3099 if (a->u) {
3100 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3101 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3102 } else {
3103 maxval = a->sf ? INT64_MAX : INT32_MAX;
3104 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3105 }
3106 } else {
3107 tcg_gen_sub_i64(t0, op0, op1);
3108 if (a->u) {
3109 maxval = 0;
3110 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3111 } else {
3112 maxval = a->sf ? INT64_MIN : INT32_MIN;
3113 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3114 }
3115 }
3116
3117 tmax = tcg_constant_i64(vsz >> a->esz);
3118 if (eq) {
3119 /* Equality means one more iteration. */
3120 tcg_gen_addi_i64(t0, t0, 1);
3121
3122 /*
3123 * For the less-than while, if op1 is maxval (and the only time
3124 * the addition above could overflow), then we produce an all-true
3125 * predicate by setting the count to the vector length. This is
3126 * because the pseudocode is described as an increment + compare
3127 * loop, and the maximum integer would always compare true.
3128 * Similarly, the greater-than while has the same issue with the
3129 * minimum integer due to the decrement + compare loop.
3130 */
3131 tcg_gen_movi_i64(t1, maxval);
3132 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3133 }
3134
3135 /* Bound to the maximum. */
3136 tcg_gen_umin_i64(t0, t0, tmax);
3137
3138 /* Set the count to zero if the condition is false. */
3139 tcg_gen_movi_i64(t1, 0);
3140 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3141
3142 /* Since we're bounded, pass as a 32-bit type. */
3143 t2 = tcg_temp_new_i32();
3144 tcg_gen_extrl_i64_i32(t2, t0);
3145
3146 /* Scale elements to bits. */
3147 tcg_gen_shli_i32(t2, t2, a->esz);
3148
3149 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3150 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3151
3152 ptr = tcg_temp_new_ptr();
3153 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd));
3154
3155 if (a->lt) {
3156 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
3157 } else {
3158 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
3159 }
3160 do_pred_flags(t2);
3161 return true;
3162 }
3163
trans_WHILE_ptr(DisasContext * s,arg_WHILE_ptr * a)3164 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3165 {
3166 TCGv_i64 op0, op1, diff, t1, tmax;
3167 TCGv_i32 t2;
3168 TCGv_ptr ptr;
3169 unsigned vsz = vec_full_reg_size(s);
3170 unsigned desc = 0;
3171
3172 if (!dc_isar_feature(aa64_sve2, s)) {
3173 return false;
3174 }
3175 if (!sve_access_check(s)) {
3176 return true;
3177 }
3178
3179 op0 = read_cpu_reg(s, a->rn, 1);
3180 op1 = read_cpu_reg(s, a->rm, 1);
3181
3182 tmax = tcg_constant_i64(vsz);
3183 diff = tcg_temp_new_i64();
3184
3185 if (a->rw) {
3186 /* WHILERW */
3187 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3188 t1 = tcg_temp_new_i64();
3189 tcg_gen_sub_i64(diff, op0, op1);
3190 tcg_gen_sub_i64(t1, op1, op0);
3191 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3192 /* Round down to a multiple of ESIZE. */
3193 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3194 /* If op1 == op0, diff == 0, and the condition is always true. */
3195 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3196 } else {
3197 /* WHILEWR */
3198 tcg_gen_sub_i64(diff, op1, op0);
3199 /* Round down to a multiple of ESIZE. */
3200 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3201 /* If op0 >= op1, diff <= 0, the condition is always true. */
3202 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3203 }
3204
3205 /* Bound to the maximum. */
3206 tcg_gen_umin_i64(diff, diff, tmax);
3207
3208 /* Since we're bounded, pass as a 32-bit type. */
3209 t2 = tcg_temp_new_i32();
3210 tcg_gen_extrl_i64_i32(t2, diff);
3211
3212 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3213 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3214
3215 ptr = tcg_temp_new_ptr();
3216 tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd));
3217
3218 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
3219 do_pred_flags(t2);
3220 return true;
3221 }
3222
3223 /*
3224 *** SVE Integer Wide Immediate - Unpredicated Group
3225 */
3226
trans_FDUP(DisasContext * s,arg_FDUP * a)3227 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3228 {
3229 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
3230 return false;
3231 }
3232 if (sve_access_check(s)) {
3233 unsigned vsz = vec_full_reg_size(s);
3234 int dofs = vec_full_reg_offset(s, a->rd);
3235 uint64_t imm;
3236
3237 /* Decode the VFP immediate. */
3238 imm = vfp_expand_imm(a->esz, a->imm);
3239 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3240 }
3241 return true;
3242 }
3243
trans_DUP_i(DisasContext * s,arg_DUP_i * a)3244 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3245 {
3246 if (!dc_isar_feature(aa64_sve, s)) {
3247 return false;
3248 }
3249 if (sve_access_check(s)) {
3250 unsigned vsz = vec_full_reg_size(s);
3251 int dofs = vec_full_reg_offset(s, a->rd);
3252 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3253 }
3254 return true;
3255 }
3256
TRANS_FEAT(ADD_zzi,aa64_sve,gen_gvec_fn_arg_zzi,tcg_gen_gvec_addi,a)3257 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
3258
3259 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3260 {
3261 a->imm = -a->imm;
3262 return trans_ADD_zzi(s, a);
3263 }
3264
trans_SUBR_zzi(DisasContext * s,arg_rri_esz * a)3265 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3266 {
3267 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3268 static const GVecGen2s op[4] = {
3269 { .fni8 = tcg_gen_vec_sub8_i64,
3270 .fniv = tcg_gen_sub_vec,
3271 .fno = gen_helper_sve_subri_b,
3272 .opt_opc = vecop_list,
3273 .vece = MO_8,
3274 .scalar_first = true },
3275 { .fni8 = tcg_gen_vec_sub16_i64,
3276 .fniv = tcg_gen_sub_vec,
3277 .fno = gen_helper_sve_subri_h,
3278 .opt_opc = vecop_list,
3279 .vece = MO_16,
3280 .scalar_first = true },
3281 { .fni4 = tcg_gen_sub_i32,
3282 .fniv = tcg_gen_sub_vec,
3283 .fno = gen_helper_sve_subri_s,
3284 .opt_opc = vecop_list,
3285 .vece = MO_32,
3286 .scalar_first = true },
3287 { .fni8 = tcg_gen_sub_i64,
3288 .fniv = tcg_gen_sub_vec,
3289 .fno = gen_helper_sve_subri_d,
3290 .opt_opc = vecop_list,
3291 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3292 .vece = MO_64,
3293 .scalar_first = true }
3294 };
3295
3296 if (!dc_isar_feature(aa64_sve, s)) {
3297 return false;
3298 }
3299 if (sve_access_check(s)) {
3300 unsigned vsz = vec_full_reg_size(s);
3301 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3302 vec_full_reg_offset(s, a->rn),
3303 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
3304 }
3305 return true;
3306 }
3307
TRANS_FEAT(MUL_zzi,aa64_sve,gen_gvec_fn_arg_zzi,tcg_gen_gvec_muli,a)3308 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
3309
3310 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3311 {
3312 if (sve_access_check(s)) {
3313 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3314 tcg_constant_i64(a->imm), u, d);
3315 }
3316 return true;
3317 }
3318
TRANS_FEAT(SQADD_zzi,aa64_sve,do_zzi_sat,a,false,false)3319 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
3320 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
3321 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
3322 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
3323
3324 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3325 {
3326 if (sve_access_check(s)) {
3327 unsigned vsz = vec_full_reg_size(s);
3328 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3329 vec_full_reg_offset(s, a->rn),
3330 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
3331 }
3332 return true;
3333 }
3334
3335 #define DO_ZZI(NAME, name) \
3336 static gen_helper_gvec_2i * const name##i_fns[4] = { \
3337 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3338 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3339 }; \
3340 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
3341
3342 DO_ZZI(SMAX, smax)
3343 DO_ZZI(UMAX, umax)
3344 DO_ZZI(SMIN, smin)
3345 DO_ZZI(UMIN, umin)
3346
3347 #undef DO_ZZI
3348
3349 static gen_helper_gvec_4 * const dot_fns[2][2] = {
3350 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3351 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3352 };
3353 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3354 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
3355
3356 /*
3357 * SVE Multiply - Indexed
3358 */
3359
TRANS_FEAT(SDOT_zzxw_s,aa64_sve,gen_gvec_ool_arg_zzxz,gen_helper_gvec_sdot_idx_b,a)3360 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3361 gen_helper_gvec_sdot_idx_b, a)
3362 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3363 gen_helper_gvec_sdot_idx_h, a)
3364 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3365 gen_helper_gvec_udot_idx_b, a)
3366 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3367 gen_helper_gvec_udot_idx_h, a)
3368
3369 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3370 gen_helper_gvec_sudot_idx_b, a)
3371 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3372 gen_helper_gvec_usdot_idx_b, a)
3373
3374 #define DO_SVE2_RRX(NAME, FUNC) \
3375 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3376 a->rd, a->rn, a->rm, a->index)
3377
3378 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3379 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3380 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
3381
3382 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3383 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3384 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
3385
3386 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3387 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3388 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
3389
3390 #undef DO_SVE2_RRX
3391
3392 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
3393 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3394 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3395
3396 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3397 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3398 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3399 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3400
3401 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3402 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3403 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3404 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3405
3406 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3407 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3408 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3409 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
3410
3411 #undef DO_SVE2_RRX_TB
3412
3413 #define DO_SVE2_RRXR(NAME, FUNC) \
3414 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
3415
3416 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3417 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3418 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
3419
3420 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3421 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3422 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
3423
3424 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3425 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3426 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
3427
3428 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3429 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3430 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
3431
3432 #undef DO_SVE2_RRXR
3433
3434 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
3435 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3436 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3437
3438 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3439 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3440 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3441 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3442
3443 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3444 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3445 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3446 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3447
3448 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3449 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3450 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3451 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3452
3453 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3454 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3455 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3456 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3457
3458 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3459 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3460 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3461 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3462
3463 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3464 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3465 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3466 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
3467
3468 #undef DO_SVE2_RRXR_TB
3469
3470 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \
3471 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3472 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3473
3474 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3475 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3476
3477 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3478 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3479
3480 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3481 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3482
3483 #undef DO_SVE2_RRXR_ROT
3484
3485 /*
3486 *** SVE Floating Point Multiply-Add Indexed Group
3487 */
3488
3489 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
3490 {
3491 static gen_helper_gvec_4_ptr * const fns[4] = {
3492 NULL,
3493 gen_helper_gvec_fmla_idx_h,
3494 gen_helper_gvec_fmla_idx_s,
3495 gen_helper_gvec_fmla_idx_d,
3496 };
3497 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
3498 (a->index << 1) | sub,
3499 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3500 }
3501
3502 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
3503 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
3504
3505 /*
3506 *** SVE Floating Point Multiply Indexed Group
3507 */
3508
3509 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
3510 NULL, gen_helper_gvec_fmul_idx_h,
3511 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
3512 };
3513 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
3514 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
3515 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3516
3517 /*
3518 *** SVE Floating Point Fast Reduction Group
3519 */
3520
3521 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3522 TCGv_ptr, TCGv_i32);
3523
do_reduce(DisasContext * s,arg_rpr_esz * a,gen_helper_fp_reduce * fn)3524 static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
3525 gen_helper_fp_reduce *fn)
3526 {
3527 unsigned vsz, p2vsz;
3528 TCGv_i32 t_desc;
3529 TCGv_ptr t_zn, t_pg, status;
3530 TCGv_i64 temp;
3531
3532 if (fn == NULL) {
3533 return false;
3534 }
3535 if (!sve_access_check(s)) {
3536 return true;
3537 }
3538
3539 vsz = vec_full_reg_size(s);
3540 p2vsz = pow2ceil(vsz);
3541 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
3542 temp = tcg_temp_new_i64();
3543 t_zn = tcg_temp_new_ptr();
3544 t_pg = tcg_temp_new_ptr();
3545
3546 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
3547 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
3548 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3549
3550 fn(temp, t_zn, t_pg, status, t_desc);
3551
3552 write_fp_dreg(s, a->rd, temp);
3553 return true;
3554 }
3555
3556 #define DO_VPZ(NAME, name) \
3557 static gen_helper_fp_reduce * const name##_fns[4] = { \
3558 NULL, gen_helper_sve_##name##_h, \
3559 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
3560 }; \
3561 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
3562
3563 DO_VPZ(FADDV, faddv)
3564 DO_VPZ(FMINNMV, fminnmv)
3565 DO_VPZ(FMAXNMV, fmaxnmv)
3566 DO_VPZ(FMINV, fminv)
3567 DO_VPZ(FMAXV, fmaxv)
3568
3569 #undef DO_VPZ
3570
3571 /*
3572 *** SVE Floating Point Unary Operations - Unpredicated Group
3573 */
3574
3575 static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
3576 NULL, gen_helper_gvec_frecpe_h,
3577 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
3578 };
3579 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
3580
3581 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
3582 NULL, gen_helper_gvec_frsqrte_h,
3583 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
3584 };
3585 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
3586
3587 /*
3588 *** SVE Floating Point Compare with Zero Group
3589 */
3590
do_ppz_fp(DisasContext * s,arg_rpr_esz * a,gen_helper_gvec_3_ptr * fn)3591 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3592 gen_helper_gvec_3_ptr *fn)
3593 {
3594 if (fn == NULL) {
3595 return false;
3596 }
3597 if (sve_access_check(s)) {
3598 unsigned vsz = vec_full_reg_size(s);
3599 TCGv_ptr status =
3600 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3601
3602 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3603 vec_full_reg_offset(s, a->rn),
3604 pred_full_reg_offset(s, a->pg),
3605 status, vsz, vsz, 0, fn);
3606 }
3607 return true;
3608 }
3609
3610 #define DO_PPZ(NAME, name) \
3611 static gen_helper_gvec_3_ptr * const name##_fns[] = { \
3612 NULL, gen_helper_sve_##name##_h, \
3613 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
3614 }; \
3615 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz])
3616
3617 DO_PPZ(FCMGE_ppz0, fcmge0)
3618 DO_PPZ(FCMGT_ppz0, fcmgt0)
3619 DO_PPZ(FCMLE_ppz0, fcmle0)
3620 DO_PPZ(FCMLT_ppz0, fcmlt0)
3621 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3622 DO_PPZ(FCMNE_ppz0, fcmne0)
3623
3624 #undef DO_PPZ
3625
3626 /*
3627 *** SVE floating-point trig multiply-add coefficient
3628 */
3629
3630 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
3631 NULL, gen_helper_sve_ftmad_h,
3632 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
3633 };
3634 TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
3635 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
3636 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3637
3638 /*
3639 *** SVE Floating Point Accumulating Reduction Group
3640 */
3641
trans_FADDA(DisasContext * s,arg_rprr_esz * a)3642 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3643 {
3644 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3645 TCGv_ptr, TCGv_ptr, TCGv_i32);
3646 static fadda_fn * const fns[3] = {
3647 gen_helper_sve_fadda_h,
3648 gen_helper_sve_fadda_s,
3649 gen_helper_sve_fadda_d,
3650 };
3651 unsigned vsz = vec_full_reg_size(s);
3652 TCGv_ptr t_rm, t_pg, t_fpst;
3653 TCGv_i64 t_val;
3654 TCGv_i32 t_desc;
3655
3656 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
3657 return false;
3658 }
3659 s->is_nonstreaming = true;
3660 if (!sve_access_check(s)) {
3661 return true;
3662 }
3663
3664 t_val = load_esz(tcg_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3665 t_rm = tcg_temp_new_ptr();
3666 t_pg = tcg_temp_new_ptr();
3667 tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm));
3668 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
3669 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3670 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
3671
3672 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3673
3674 write_fp_dreg(s, a->rd, t_val);
3675 return true;
3676 }
3677
3678 /*
3679 *** SVE Floating Point Arithmetic - Unpredicated Group
3680 */
3681
3682 #define DO_FP3(NAME, name) \
3683 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
3684 NULL, gen_helper_gvec_##name##_h, \
3685 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3686 }; \
3687 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
3688
3689 DO_FP3(FADD_zzz, fadd)
3690 DO_FP3(FSUB_zzz, fsub)
3691 DO_FP3(FMUL_zzz, fmul)
3692 DO_FP3(FRECPS, recps)
3693 DO_FP3(FRSQRTS, rsqrts)
3694
3695 #undef DO_FP3
3696
3697 static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
3698 NULL, gen_helper_gvec_ftsmul_h,
3699 gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
3700 };
3701 TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
3702 ftsmul_fns[a->esz], a, 0)
3703
3704 /*
3705 *** SVE Floating Point Arithmetic - Predicated Group
3706 */
3707
3708 #define DO_ZPZZ_FP(NAME, FEAT, name) \
3709 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
3710 NULL, gen_helper_##name##_h, \
3711 gen_helper_##name##_s, gen_helper_##name##_d \
3712 }; \
3713 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
3714
3715 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
3716 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
3717 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
3718 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
3719 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
3720 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
3721 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
3722 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
3723 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
3724 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
3725 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
3726
3727 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3728 TCGv_i64, TCGv_ptr, TCGv_i32);
3729
do_fp_scalar(DisasContext * s,int zd,int zn,int pg,bool is_fp16,TCGv_i64 scalar,gen_helper_sve_fp2scalar * fn)3730 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3731 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3732 {
3733 unsigned vsz = vec_full_reg_size(s);
3734 TCGv_ptr t_zd, t_zn, t_pg, status;
3735 TCGv_i32 desc;
3736
3737 t_zd = tcg_temp_new_ptr();
3738 t_zn = tcg_temp_new_ptr();
3739 t_pg = tcg_temp_new_ptr();
3740 tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd));
3741 tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn));
3742 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
3743
3744 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
3745 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
3746 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3747 }
3748
do_fp_imm(DisasContext * s,arg_rpri_esz * a,uint64_t imm,gen_helper_sve_fp2scalar * fn)3749 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3750 gen_helper_sve_fp2scalar *fn)
3751 {
3752 if (fn == NULL) {
3753 return false;
3754 }
3755 if (sve_access_check(s)) {
3756 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
3757 tcg_constant_i64(imm), fn);
3758 }
3759 return true;
3760 }
3761
3762 #define DO_FP_IMM(NAME, name, const0, const1) \
3763 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \
3764 NULL, gen_helper_sve_##name##_h, \
3765 gen_helper_sve_##name##_s, \
3766 gen_helper_sve_##name##_d \
3767 }; \
3768 static uint64_t const name##_const[4][2] = { \
3769 { -1, -1 }, \
3770 { float16_##const0, float16_##const1 }, \
3771 { float32_##const0, float32_##const1 }, \
3772 { float64_##const0, float64_##const1 }, \
3773 }; \
3774 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
3775 name##_const[a->esz][a->imm], name##_fns[a->esz])
3776
DO_FP_IMM(FADD,fadds,half,one)3777 DO_FP_IMM(FADD, fadds, half, one)
3778 DO_FP_IMM(FSUB, fsubs, half, one)
3779 DO_FP_IMM(FMUL, fmuls, half, two)
3780 DO_FP_IMM(FSUBR, fsubrs, half, one)
3781 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3782 DO_FP_IMM(FMINNM, fminnms, zero, one)
3783 DO_FP_IMM(FMAX, fmaxs, zero, one)
3784 DO_FP_IMM(FMIN, fmins, zero, one)
3785
3786 #undef DO_FP_IMM
3787
3788 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3789 gen_helper_gvec_4_ptr *fn)
3790 {
3791 if (fn == NULL) {
3792 return false;
3793 }
3794 if (sve_access_check(s)) {
3795 unsigned vsz = vec_full_reg_size(s);
3796 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3797 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3798 vec_full_reg_offset(s, a->rn),
3799 vec_full_reg_offset(s, a->rm),
3800 pred_full_reg_offset(s, a->pg),
3801 status, vsz, vsz, 0, fn);
3802 }
3803 return true;
3804 }
3805
3806 #define DO_FPCMP(NAME, name) \
3807 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \
3808 NULL, gen_helper_sve_##name##_h, \
3809 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3810 }; \
3811 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz])
3812
3813 DO_FPCMP(FCMGE, fcmge)
3814 DO_FPCMP(FCMGT, fcmgt)
3815 DO_FPCMP(FCMEQ, fcmeq)
3816 DO_FPCMP(FCMNE, fcmne)
3817 DO_FPCMP(FCMUO, fcmuo)
3818 DO_FPCMP(FACGE, facge)
3819 DO_FPCMP(FACGT, facgt)
3820
3821 #undef DO_FPCMP
3822
3823 static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
3824 NULL, gen_helper_sve_fcadd_h,
3825 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
3826 };
3827 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
3828 a->rd, a->rn, a->rm, a->pg, a->rot,
3829 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3830
3831 #define DO_FMLA(NAME, name) \
3832 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
3833 NULL, gen_helper_sve_##name##_h, \
3834 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3835 }; \
3836 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
3837 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
3838 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3839
3840 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3841 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3842 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3843 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3844
3845 #undef DO_FMLA
3846
3847 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
3848 NULL, gen_helper_sve_fcmla_zpzzz_h,
3849 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
3850 };
3851 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
3852 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
3853 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3854
3855 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
3856 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
3857 };
3858 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
3859 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
3860 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3861
3862 /*
3863 *** SVE Floating Point Unary Operations Predicated Group
3864 */
3865
3866 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
3867 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR)
3868 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
3869 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR)
3870
3871 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
3872 gen_helper_sve_bfcvt, a, 0, FPST_FPCR)
3873
3874 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
3875 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR)
3876 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
3877 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR)
3878 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
3879 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR)
3880 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
3881 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR)
3882
3883 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
3884 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
3885 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
3886 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16)
3887 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
3888 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16)
3889 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
3890 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16)
3891 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
3892 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16)
3893 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
3894 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
3895
3896 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
3897 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR)
3898 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
3899 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR)
3900 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
3901 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR)
3902 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
3903 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR)
3904 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
3905 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR)
3906 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
3907 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR)
3908
3909 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
3910 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR)
3911 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
3912 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR)
3913
3914 static gen_helper_gvec_3_ptr * const frint_fns[] = {
3915 NULL,
3916 gen_helper_sve_frint_h,
3917 gen_helper_sve_frint_s,
3918 gen_helper_sve_frint_d
3919 };
3920 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
3921 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3922
3923 static gen_helper_gvec_3_ptr * const frintx_fns[] = {
3924 NULL,
3925 gen_helper_sve_frintx_h,
3926 gen_helper_sve_frintx_s,
3927 gen_helper_sve_frintx_d
3928 };
3929 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
3930 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3931
do_frint_mode(DisasContext * s,arg_rpr_esz * a,ARMFPRounding mode,gen_helper_gvec_3_ptr * fn)3932 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
3933 ARMFPRounding mode, gen_helper_gvec_3_ptr *fn)
3934 {
3935 unsigned vsz;
3936 TCGv_i32 tmode;
3937 TCGv_ptr status;
3938
3939 if (fn == NULL) {
3940 return false;
3941 }
3942 if (!sve_access_check(s)) {
3943 return true;
3944 }
3945
3946 vsz = vec_full_reg_size(s);
3947 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3948 tmode = gen_set_rmode(mode, status);
3949
3950 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3951 vec_full_reg_offset(s, a->rn),
3952 pred_full_reg_offset(s, a->pg),
3953 status, vsz, vsz, 0, fn);
3954
3955 gen_restore_rmode(tmode, status);
3956 return true;
3957 }
3958
3959 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a,
3960 FPROUNDING_TIEEVEN, frint_fns[a->esz])
3961 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a,
3962 FPROUNDING_POSINF, frint_fns[a->esz])
3963 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a,
3964 FPROUNDING_NEGINF, frint_fns[a->esz])
3965 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a,
3966 FPROUNDING_ZERO, frint_fns[a->esz])
3967 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a,
3968 FPROUNDING_TIEAWAY, frint_fns[a->esz])
3969
3970 static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
3971 NULL, gen_helper_sve_frecpx_h,
3972 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
3973 };
3974 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
3975 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3976
3977 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
3978 NULL, gen_helper_sve_fsqrt_h,
3979 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
3980 };
3981 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
3982 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3983
3984 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
3985 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
3986 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
3987 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16)
3988 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
3989 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
3990
3991 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
3992 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR)
3993 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
3994 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR)
3995
3996 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
3997 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR)
3998 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
3999 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR)
4000
4001 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4002 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
4003 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4004 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16)
4005 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4006 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
4007
4008 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4009 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR)
4010 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4011 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR)
4012 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4013 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR)
4014
4015 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4016 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR)
4017
4018 /*
4019 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4020 */
4021
4022 /* Subroutine loading a vector register at VOFS of LEN bytes.
4023 * The load should begin at the address Rn + IMM.
4024 */
4025
gen_sve_ldr(DisasContext * s,TCGv_ptr base,int vofs,int len,int rn,int imm)4026 void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
4027 int len, int rn, int imm)
4028 {
4029 int len_align = QEMU_ALIGN_DOWN(len, 16);
4030 int len_remain = len % 16;
4031 int nparts = len / 16 + ctpop8(len_remain);
4032 int midx = get_mem_index(s);
4033 TCGv_i64 dirty_addr, clean_addr, t0, t1;
4034 TCGv_i128 t16;
4035
4036 dirty_addr = tcg_temp_new_i64();
4037 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4038 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4039
4040 /*
4041 * Note that unpredicated load/store of vector/predicate registers
4042 * are defined as a stream of bytes, which equates to little-endian
4043 * operations on larger quantities.
4044 * Attempt to keep code expansion to a minimum by limiting the
4045 * amount of unrolling done.
4046 */
4047 if (nparts <= 4) {
4048 int i;
4049
4050 t0 = tcg_temp_new_i64();
4051 t1 = tcg_temp_new_i64();
4052 t16 = tcg_temp_new_i128();
4053
4054 for (i = 0; i < len_align; i += 16) {
4055 tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
4056 MO_LE | MO_128 | MO_ATOM_NONE);
4057 tcg_gen_extr_i128_i64(t0, t1, t16);
4058 tcg_gen_st_i64(t0, base, vofs + i);
4059 tcg_gen_st_i64(t1, base, vofs + i + 8);
4060 tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4061 }
4062 } else {
4063 TCGLabel *loop = gen_new_label();
4064 TCGv_ptr tp, i = tcg_temp_new_ptr();
4065
4066 tcg_gen_movi_ptr(i, 0);
4067 gen_set_label(loop);
4068
4069 t16 = tcg_temp_new_i128();
4070 tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
4071 MO_LE | MO_128 | MO_ATOM_NONE);
4072 tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4073
4074 tp = tcg_temp_new_ptr();
4075 tcg_gen_add_ptr(tp, base, i);
4076 tcg_gen_addi_ptr(i, i, 16);
4077
4078 t0 = tcg_temp_new_i64();
4079 t1 = tcg_temp_new_i64();
4080 tcg_gen_extr_i128_i64(t0, t1, t16);
4081
4082 tcg_gen_st_i64(t0, tp, vofs);
4083 tcg_gen_st_i64(t1, tp, vofs + 8);
4084
4085 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4086 }
4087
4088 /*
4089 * Predicate register loads can be any multiple of 2.
4090 * Note that we still store the entire 64-bit unit into tcg_env.
4091 */
4092 if (len_remain >= 8) {
4093 t0 = tcg_temp_new_i64();
4094 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
4095 tcg_gen_st_i64(t0, base, vofs + len_align);
4096 len_remain -= 8;
4097 len_align += 8;
4098 if (len_remain) {
4099 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4100 }
4101 }
4102 if (len_remain) {
4103 t0 = tcg_temp_new_i64();
4104 switch (len_remain) {
4105 case 2:
4106 case 4:
4107 case 8:
4108 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4109 MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
4110 break;
4111
4112 case 6:
4113 t1 = tcg_temp_new_i64();
4114 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
4115 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4116 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
4117 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4118 break;
4119
4120 default:
4121 g_assert_not_reached();
4122 }
4123 tcg_gen_st_i64(t0, base, vofs + len_align);
4124 }
4125 }
4126
4127 /* Similarly for stores. */
gen_sve_str(DisasContext * s,TCGv_ptr base,int vofs,int len,int rn,int imm)4128 void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
4129 int len, int rn, int imm)
4130 {
4131 int len_align = QEMU_ALIGN_DOWN(len, 16);
4132 int len_remain = len % 16;
4133 int nparts = len / 16 + ctpop8(len_remain);
4134 int midx = get_mem_index(s);
4135 TCGv_i64 dirty_addr, clean_addr, t0, t1;
4136 TCGv_i128 t16;
4137
4138 dirty_addr = tcg_temp_new_i64();
4139 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4140 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4141
4142 /* Note that unpredicated load/store of vector/predicate registers
4143 * are defined as a stream of bytes, which equates to little-endian
4144 * operations on larger quantities. There is no nice way to force
4145 * a little-endian store for aarch64_be-linux-user out of line.
4146 *
4147 * Attempt to keep code expansion to a minimum by limiting the
4148 * amount of unrolling done.
4149 */
4150 if (nparts <= 4) {
4151 int i;
4152
4153 t0 = tcg_temp_new_i64();
4154 t1 = tcg_temp_new_i64();
4155 t16 = tcg_temp_new_i128();
4156 for (i = 0; i < len_align; i += 16) {
4157 tcg_gen_ld_i64(t0, base, vofs + i);
4158 tcg_gen_ld_i64(t1, base, vofs + i + 8);
4159 tcg_gen_concat_i64_i128(t16, t0, t1);
4160 tcg_gen_qemu_st_i128(t16, clean_addr, midx,
4161 MO_LE | MO_128 | MO_ATOM_NONE);
4162 tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4163 }
4164 } else {
4165 TCGLabel *loop = gen_new_label();
4166 TCGv_ptr tp, i = tcg_temp_new_ptr();
4167
4168 tcg_gen_movi_ptr(i, 0);
4169 gen_set_label(loop);
4170
4171 t0 = tcg_temp_new_i64();
4172 t1 = tcg_temp_new_i64();
4173 tp = tcg_temp_new_ptr();
4174 tcg_gen_add_ptr(tp, base, i);
4175 tcg_gen_ld_i64(t0, tp, vofs);
4176 tcg_gen_ld_i64(t1, tp, vofs + 8);
4177 tcg_gen_addi_ptr(i, i, 16);
4178
4179 t16 = tcg_temp_new_i128();
4180 tcg_gen_concat_i64_i128(t16, t0, t1);
4181
4182 tcg_gen_qemu_st_i128(t16, clean_addr, midx,
4183 MO_LE | MO_128 | MO_ATOM_NONE);
4184 tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4185
4186 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4187 }
4188
4189 /* Predicate register stores can be any multiple of 2. */
4190 if (len_remain >= 8) {
4191 t0 = tcg_temp_new_i64();
4192 tcg_gen_ld_i64(t0, base, vofs + len_align);
4193 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
4194 len_remain -= 8;
4195 len_align += 8;
4196 if (len_remain) {
4197 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4198 }
4199 }
4200 if (len_remain) {
4201 t0 = tcg_temp_new_i64();
4202 tcg_gen_ld_i64(t0, base, vofs + len_align);
4203
4204 switch (len_remain) {
4205 case 2:
4206 case 4:
4207 case 8:
4208 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4209 MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
4210 break;
4211
4212 case 6:
4213 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
4214 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4215 tcg_gen_shri_i64(t0, t0, 32);
4216 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
4217 break;
4218
4219 default:
4220 g_assert_not_reached();
4221 }
4222 }
4223 }
4224
trans_LDR_zri(DisasContext * s,arg_rri * a)4225 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4226 {
4227 if (!dc_isar_feature(aa64_sve, s)) {
4228 return false;
4229 }
4230 if (sve_access_check(s)) {
4231 int size = vec_full_reg_size(s);
4232 int off = vec_full_reg_offset(s, a->rd);
4233 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size);
4234 }
4235 return true;
4236 }
4237
trans_LDR_pri(DisasContext * s,arg_rri * a)4238 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4239 {
4240 if (!dc_isar_feature(aa64_sve, s)) {
4241 return false;
4242 }
4243 if (sve_access_check(s)) {
4244 int size = pred_full_reg_size(s);
4245 int off = pred_full_reg_offset(s, a->rd);
4246 gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size);
4247 }
4248 return true;
4249 }
4250
trans_STR_zri(DisasContext * s,arg_rri * a)4251 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4252 {
4253 if (!dc_isar_feature(aa64_sve, s)) {
4254 return false;
4255 }
4256 if (sve_access_check(s)) {
4257 int size = vec_full_reg_size(s);
4258 int off = vec_full_reg_offset(s, a->rd);
4259 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size);
4260 }
4261 return true;
4262 }
4263
trans_STR_pri(DisasContext * s,arg_rri * a)4264 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4265 {
4266 if (!dc_isar_feature(aa64_sve, s)) {
4267 return false;
4268 }
4269 if (sve_access_check(s)) {
4270 int size = pred_full_reg_size(s);
4271 int off = pred_full_reg_offset(s, a->rd);
4272 gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size);
4273 }
4274 return true;
4275 }
4276
4277 /*
4278 *** SVE Memory - Contiguous Load Group
4279 */
4280
4281 /* The memory mode of the dtype. */
4282 static const MemOp dtype_mop[16] = {
4283 MO_UB, MO_UB, MO_UB, MO_UB,
4284 MO_SL, MO_UW, MO_UW, MO_UW,
4285 MO_SW, MO_SW, MO_UL, MO_UL,
4286 MO_SB, MO_SB, MO_SB, MO_UQ
4287 };
4288
4289 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4290
4291 /* The vector element size of dtype. */
4292 static const uint8_t dtype_esz[16] = {
4293 0, 1, 2, 3,
4294 3, 1, 2, 3,
4295 3, 2, 2, 3,
4296 3, 2, 1, 3
4297 };
4298
make_svemte_desc(DisasContext * s,unsigned vsz,uint32_t nregs,uint32_t msz,bool is_write,uint32_t data)4299 uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs,
4300 uint32_t msz, bool is_write, uint32_t data)
4301 {
4302 uint32_t sizem1;
4303 uint32_t desc = 0;
4304
4305 /* Assert all of the data fits, with or without MTE enabled. */
4306 assert(nregs >= 1 && nregs <= 4);
4307 sizem1 = (nregs << msz) - 1;
4308 assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT);
4309 assert(data < 1u << SVE_MTEDESC_SHIFT);
4310
4311 if (s->mte_active[0]) {
4312 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4313 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4314 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4315 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
4316 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1);
4317 desc <<= SVE_MTEDESC_SHIFT;
4318 }
4319 return simd_desc(vsz, vsz, desc | data);
4320 }
4321
do_mem_zpa(DisasContext * s,int zt,int pg,TCGv_i64 addr,int dtype,uint32_t nregs,bool is_write,gen_helper_gvec_mem * fn)4322 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4323 int dtype, uint32_t nregs, bool is_write,
4324 gen_helper_gvec_mem *fn)
4325 {
4326 TCGv_ptr t_pg;
4327 uint32_t desc;
4328
4329 if (!s->mte_active[0]) {
4330 addr = clean_data_tbi(s, addr);
4331 }
4332
4333 /*
4334 * For e.g. LD4, there are not enough arguments to pass all 4
4335 * registers as pointers, so encode the regno into the data field.
4336 * For consistency, do this even for LD1.
4337 */
4338 desc = make_svemte_desc(s, vec_full_reg_size(s), nregs,
4339 dtype_msz(dtype), is_write, zt);
4340 t_pg = tcg_temp_new_ptr();
4341
4342 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
4343 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
4344 }
4345
4346 /* Indexed by [mte][be][dtype][nreg] */
4347 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4348 { /* mte inactive, little-endian */
4349 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4350 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4351 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4352 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4353 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4354
4355 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4356 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4357 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4358 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4359 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4360
4361 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4362 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4363 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4364 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4365 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4366
4367 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4368 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4369 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4370 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4371 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4372
4373 /* mte inactive, big-endian */
4374 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4375 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4376 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4377 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4378 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4379
4380 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4381 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4382 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4383 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4384 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4385
4386 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4387 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4388 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4389 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4390 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4391
4392 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4393 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4394 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4395 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4396 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4397
4398 { /* mte active, little-endian */
4399 { { gen_helper_sve_ld1bb_r_mte,
4400 gen_helper_sve_ld2bb_r_mte,
4401 gen_helper_sve_ld3bb_r_mte,
4402 gen_helper_sve_ld4bb_r_mte },
4403 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4404 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4405 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4406
4407 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4408 { gen_helper_sve_ld1hh_le_r_mte,
4409 gen_helper_sve_ld2hh_le_r_mte,
4410 gen_helper_sve_ld3hh_le_r_mte,
4411 gen_helper_sve_ld4hh_le_r_mte },
4412 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4413 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4414
4415 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4416 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4417 { gen_helper_sve_ld1ss_le_r_mte,
4418 gen_helper_sve_ld2ss_le_r_mte,
4419 gen_helper_sve_ld3ss_le_r_mte,
4420 gen_helper_sve_ld4ss_le_r_mte },
4421 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4422
4423 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4424 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4425 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4426 { gen_helper_sve_ld1dd_le_r_mte,
4427 gen_helper_sve_ld2dd_le_r_mte,
4428 gen_helper_sve_ld3dd_le_r_mte,
4429 gen_helper_sve_ld4dd_le_r_mte } },
4430
4431 /* mte active, big-endian */
4432 { { gen_helper_sve_ld1bb_r_mte,
4433 gen_helper_sve_ld2bb_r_mte,
4434 gen_helper_sve_ld3bb_r_mte,
4435 gen_helper_sve_ld4bb_r_mte },
4436 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4437 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4438 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4439
4440 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4441 { gen_helper_sve_ld1hh_be_r_mte,
4442 gen_helper_sve_ld2hh_be_r_mte,
4443 gen_helper_sve_ld3hh_be_r_mte,
4444 gen_helper_sve_ld4hh_be_r_mte },
4445 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4446 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4447
4448 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4449 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4450 { gen_helper_sve_ld1ss_be_r_mte,
4451 gen_helper_sve_ld2ss_be_r_mte,
4452 gen_helper_sve_ld3ss_be_r_mte,
4453 gen_helper_sve_ld4ss_be_r_mte },
4454 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4455
4456 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4457 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4458 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4459 { gen_helper_sve_ld1dd_be_r_mte,
4460 gen_helper_sve_ld2dd_be_r_mte,
4461 gen_helper_sve_ld3dd_be_r_mte,
4462 gen_helper_sve_ld4dd_be_r_mte } } },
4463 };
4464
do_ld_zpa(DisasContext * s,int zt,int pg,TCGv_i64 addr,int dtype,int nreg)4465 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4466 TCGv_i64 addr, int dtype, int nreg)
4467 {
4468 gen_helper_gvec_mem *fn
4469 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
4470
4471 /*
4472 * While there are holes in the table, they are not
4473 * accessible via the instruction encoding.
4474 */
4475 assert(fn != NULL);
4476 do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn);
4477 }
4478
trans_LD_zprr(DisasContext * s,arg_rprr_load * a)4479 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4480 {
4481 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
4482 return false;
4483 }
4484 if (sve_access_check(s)) {
4485 TCGv_i64 addr = tcg_temp_new_i64();
4486 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4487 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4488 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4489 }
4490 return true;
4491 }
4492
trans_LD_zpri(DisasContext * s,arg_rpri_load * a)4493 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4494 {
4495 if (!dc_isar_feature(aa64_sve, s)) {
4496 return false;
4497 }
4498 if (sve_access_check(s)) {
4499 int vsz = vec_full_reg_size(s);
4500 int elements = vsz >> dtype_esz[a->dtype];
4501 TCGv_i64 addr = tcg_temp_new_i64();
4502
4503 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4504 (a->imm * elements * (a->nreg + 1))
4505 << dtype_msz(a->dtype));
4506 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4507 }
4508 return true;
4509 }
4510
trans_LDFF1_zprr(DisasContext * s,arg_rprr_load * a)4511 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4512 {
4513 static gen_helper_gvec_mem * const fns[2][2][16] = {
4514 { /* mte inactive, little-endian */
4515 { gen_helper_sve_ldff1bb_r,
4516 gen_helper_sve_ldff1bhu_r,
4517 gen_helper_sve_ldff1bsu_r,
4518 gen_helper_sve_ldff1bdu_r,
4519
4520 gen_helper_sve_ldff1sds_le_r,
4521 gen_helper_sve_ldff1hh_le_r,
4522 gen_helper_sve_ldff1hsu_le_r,
4523 gen_helper_sve_ldff1hdu_le_r,
4524
4525 gen_helper_sve_ldff1hds_le_r,
4526 gen_helper_sve_ldff1hss_le_r,
4527 gen_helper_sve_ldff1ss_le_r,
4528 gen_helper_sve_ldff1sdu_le_r,
4529
4530 gen_helper_sve_ldff1bds_r,
4531 gen_helper_sve_ldff1bss_r,
4532 gen_helper_sve_ldff1bhs_r,
4533 gen_helper_sve_ldff1dd_le_r },
4534
4535 /* mte inactive, big-endian */
4536 { gen_helper_sve_ldff1bb_r,
4537 gen_helper_sve_ldff1bhu_r,
4538 gen_helper_sve_ldff1bsu_r,
4539 gen_helper_sve_ldff1bdu_r,
4540
4541 gen_helper_sve_ldff1sds_be_r,
4542 gen_helper_sve_ldff1hh_be_r,
4543 gen_helper_sve_ldff1hsu_be_r,
4544 gen_helper_sve_ldff1hdu_be_r,
4545
4546 gen_helper_sve_ldff1hds_be_r,
4547 gen_helper_sve_ldff1hss_be_r,
4548 gen_helper_sve_ldff1ss_be_r,
4549 gen_helper_sve_ldff1sdu_be_r,
4550
4551 gen_helper_sve_ldff1bds_r,
4552 gen_helper_sve_ldff1bss_r,
4553 gen_helper_sve_ldff1bhs_r,
4554 gen_helper_sve_ldff1dd_be_r } },
4555
4556 { /* mte active, little-endian */
4557 { gen_helper_sve_ldff1bb_r_mte,
4558 gen_helper_sve_ldff1bhu_r_mte,
4559 gen_helper_sve_ldff1bsu_r_mte,
4560 gen_helper_sve_ldff1bdu_r_mte,
4561
4562 gen_helper_sve_ldff1sds_le_r_mte,
4563 gen_helper_sve_ldff1hh_le_r_mte,
4564 gen_helper_sve_ldff1hsu_le_r_mte,
4565 gen_helper_sve_ldff1hdu_le_r_mte,
4566
4567 gen_helper_sve_ldff1hds_le_r_mte,
4568 gen_helper_sve_ldff1hss_le_r_mte,
4569 gen_helper_sve_ldff1ss_le_r_mte,
4570 gen_helper_sve_ldff1sdu_le_r_mte,
4571
4572 gen_helper_sve_ldff1bds_r_mte,
4573 gen_helper_sve_ldff1bss_r_mte,
4574 gen_helper_sve_ldff1bhs_r_mte,
4575 gen_helper_sve_ldff1dd_le_r_mte },
4576
4577 /* mte active, big-endian */
4578 { gen_helper_sve_ldff1bb_r_mte,
4579 gen_helper_sve_ldff1bhu_r_mte,
4580 gen_helper_sve_ldff1bsu_r_mte,
4581 gen_helper_sve_ldff1bdu_r_mte,
4582
4583 gen_helper_sve_ldff1sds_be_r_mte,
4584 gen_helper_sve_ldff1hh_be_r_mte,
4585 gen_helper_sve_ldff1hsu_be_r_mte,
4586 gen_helper_sve_ldff1hdu_be_r_mte,
4587
4588 gen_helper_sve_ldff1hds_be_r_mte,
4589 gen_helper_sve_ldff1hss_be_r_mte,
4590 gen_helper_sve_ldff1ss_be_r_mte,
4591 gen_helper_sve_ldff1sdu_be_r_mte,
4592
4593 gen_helper_sve_ldff1bds_r_mte,
4594 gen_helper_sve_ldff1bss_r_mte,
4595 gen_helper_sve_ldff1bhs_r_mte,
4596 gen_helper_sve_ldff1dd_be_r_mte } },
4597 };
4598
4599 if (!dc_isar_feature(aa64_sve, s)) {
4600 return false;
4601 }
4602 s->is_nonstreaming = true;
4603 if (sve_access_check(s)) {
4604 TCGv_i64 addr = tcg_temp_new_i64();
4605 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4606 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4607 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4608 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4609 }
4610 return true;
4611 }
4612
trans_LDNF1_zpri(DisasContext * s,arg_rpri_load * a)4613 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4614 {
4615 static gen_helper_gvec_mem * const fns[2][2][16] = {
4616 { /* mte inactive, little-endian */
4617 { gen_helper_sve_ldnf1bb_r,
4618 gen_helper_sve_ldnf1bhu_r,
4619 gen_helper_sve_ldnf1bsu_r,
4620 gen_helper_sve_ldnf1bdu_r,
4621
4622 gen_helper_sve_ldnf1sds_le_r,
4623 gen_helper_sve_ldnf1hh_le_r,
4624 gen_helper_sve_ldnf1hsu_le_r,
4625 gen_helper_sve_ldnf1hdu_le_r,
4626
4627 gen_helper_sve_ldnf1hds_le_r,
4628 gen_helper_sve_ldnf1hss_le_r,
4629 gen_helper_sve_ldnf1ss_le_r,
4630 gen_helper_sve_ldnf1sdu_le_r,
4631
4632 gen_helper_sve_ldnf1bds_r,
4633 gen_helper_sve_ldnf1bss_r,
4634 gen_helper_sve_ldnf1bhs_r,
4635 gen_helper_sve_ldnf1dd_le_r },
4636
4637 /* mte inactive, big-endian */
4638 { gen_helper_sve_ldnf1bb_r,
4639 gen_helper_sve_ldnf1bhu_r,
4640 gen_helper_sve_ldnf1bsu_r,
4641 gen_helper_sve_ldnf1bdu_r,
4642
4643 gen_helper_sve_ldnf1sds_be_r,
4644 gen_helper_sve_ldnf1hh_be_r,
4645 gen_helper_sve_ldnf1hsu_be_r,
4646 gen_helper_sve_ldnf1hdu_be_r,
4647
4648 gen_helper_sve_ldnf1hds_be_r,
4649 gen_helper_sve_ldnf1hss_be_r,
4650 gen_helper_sve_ldnf1ss_be_r,
4651 gen_helper_sve_ldnf1sdu_be_r,
4652
4653 gen_helper_sve_ldnf1bds_r,
4654 gen_helper_sve_ldnf1bss_r,
4655 gen_helper_sve_ldnf1bhs_r,
4656 gen_helper_sve_ldnf1dd_be_r } },
4657
4658 { /* mte inactive, little-endian */
4659 { gen_helper_sve_ldnf1bb_r_mte,
4660 gen_helper_sve_ldnf1bhu_r_mte,
4661 gen_helper_sve_ldnf1bsu_r_mte,
4662 gen_helper_sve_ldnf1bdu_r_mte,
4663
4664 gen_helper_sve_ldnf1sds_le_r_mte,
4665 gen_helper_sve_ldnf1hh_le_r_mte,
4666 gen_helper_sve_ldnf1hsu_le_r_mte,
4667 gen_helper_sve_ldnf1hdu_le_r_mte,
4668
4669 gen_helper_sve_ldnf1hds_le_r_mte,
4670 gen_helper_sve_ldnf1hss_le_r_mte,
4671 gen_helper_sve_ldnf1ss_le_r_mte,
4672 gen_helper_sve_ldnf1sdu_le_r_mte,
4673
4674 gen_helper_sve_ldnf1bds_r_mte,
4675 gen_helper_sve_ldnf1bss_r_mte,
4676 gen_helper_sve_ldnf1bhs_r_mte,
4677 gen_helper_sve_ldnf1dd_le_r_mte },
4678
4679 /* mte inactive, big-endian */
4680 { gen_helper_sve_ldnf1bb_r_mte,
4681 gen_helper_sve_ldnf1bhu_r_mte,
4682 gen_helper_sve_ldnf1bsu_r_mte,
4683 gen_helper_sve_ldnf1bdu_r_mte,
4684
4685 gen_helper_sve_ldnf1sds_be_r_mte,
4686 gen_helper_sve_ldnf1hh_be_r_mte,
4687 gen_helper_sve_ldnf1hsu_be_r_mte,
4688 gen_helper_sve_ldnf1hdu_be_r_mte,
4689
4690 gen_helper_sve_ldnf1hds_be_r_mte,
4691 gen_helper_sve_ldnf1hss_be_r_mte,
4692 gen_helper_sve_ldnf1ss_be_r_mte,
4693 gen_helper_sve_ldnf1sdu_be_r_mte,
4694
4695 gen_helper_sve_ldnf1bds_r_mte,
4696 gen_helper_sve_ldnf1bss_r_mte,
4697 gen_helper_sve_ldnf1bhs_r_mte,
4698 gen_helper_sve_ldnf1dd_be_r_mte } },
4699 };
4700
4701 if (!dc_isar_feature(aa64_sve, s)) {
4702 return false;
4703 }
4704 s->is_nonstreaming = true;
4705 if (sve_access_check(s)) {
4706 int vsz = vec_full_reg_size(s);
4707 int elements = vsz >> dtype_esz[a->dtype];
4708 int off = (a->imm * elements) << dtype_msz(a->dtype);
4709 TCGv_i64 addr = tcg_temp_new_i64();
4710
4711 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4712 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4713 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4714 }
4715 return true;
4716 }
4717
do_ldrq(DisasContext * s,int zt,int pg,TCGv_i64 addr,int dtype)4718 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4719 {
4720 unsigned vsz = vec_full_reg_size(s);
4721 TCGv_ptr t_pg;
4722 int poff;
4723 uint32_t desc;
4724
4725 /* Load the first quadword using the normal predicated load helpers. */
4726 if (!s->mte_active[0]) {
4727 addr = clean_data_tbi(s, addr);
4728 }
4729
4730 poff = pred_full_reg_offset(s, pg);
4731 if (vsz > 16) {
4732 /*
4733 * Zero-extend the first 16 bits of the predicate into a temporary.
4734 * This avoids triggering an assert making sure we don't have bits
4735 * set within a predicate beyond VQ, but we have lowered VQ to 1
4736 * for this load operation.
4737 */
4738 TCGv_i64 tmp = tcg_temp_new_i64();
4739 #if HOST_BIG_ENDIAN
4740 poff += 6;
4741 #endif
4742 tcg_gen_ld16u_i64(tmp, tcg_env, poff);
4743
4744 poff = offsetof(CPUARMState, vfp.preg_tmp);
4745 tcg_gen_st_i64(tmp, tcg_env, poff);
4746 }
4747
4748 t_pg = tcg_temp_new_ptr();
4749 tcg_gen_addi_ptr(t_pg, tcg_env, poff);
4750
4751 gen_helper_gvec_mem *fn
4752 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
4753 desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt);
4754 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
4755
4756 /* Replicate that first quadword. */
4757 if (vsz > 16) {
4758 int doff = vec_full_reg_offset(s, zt);
4759 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
4760 }
4761 }
4762
trans_LD1RQ_zprr(DisasContext * s,arg_rprr_load * a)4763 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4764 {
4765 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
4766 return false;
4767 }
4768 if (sve_access_check(s)) {
4769 int msz = dtype_msz(a->dtype);
4770 TCGv_i64 addr = tcg_temp_new_i64();
4771 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4772 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4773 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
4774 }
4775 return true;
4776 }
4777
trans_LD1RQ_zpri(DisasContext * s,arg_rpri_load * a)4778 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4779 {
4780 if (!dc_isar_feature(aa64_sve, s)) {
4781 return false;
4782 }
4783 if (sve_access_check(s)) {
4784 TCGv_i64 addr = tcg_temp_new_i64();
4785 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4786 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
4787 }
4788 return true;
4789 }
4790
do_ldro(DisasContext * s,int zt,int pg,TCGv_i64 addr,int dtype)4791 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4792 {
4793 unsigned vsz = vec_full_reg_size(s);
4794 unsigned vsz_r32;
4795 TCGv_ptr t_pg;
4796 int poff, doff;
4797 uint32_t desc;
4798
4799 if (vsz < 32) {
4800 /*
4801 * Note that this UNDEFINED check comes after CheckSVEEnabled()
4802 * in the ARM pseudocode, which is the sve_access_check() done
4803 * in our caller. We should not now return false from the caller.
4804 */
4805 unallocated_encoding(s);
4806 return;
4807 }
4808
4809 /* Load the first octaword using the normal predicated load helpers. */
4810 if (!s->mte_active[0]) {
4811 addr = clean_data_tbi(s, addr);
4812 }
4813
4814 poff = pred_full_reg_offset(s, pg);
4815 if (vsz > 32) {
4816 /*
4817 * Zero-extend the first 32 bits of the predicate into a temporary.
4818 * This avoids triggering an assert making sure we don't have bits
4819 * set within a predicate beyond VQ, but we have lowered VQ to 2
4820 * for this load operation.
4821 */
4822 TCGv_i64 tmp = tcg_temp_new_i64();
4823 #if HOST_BIG_ENDIAN
4824 poff += 4;
4825 #endif
4826 tcg_gen_ld32u_i64(tmp, tcg_env, poff);
4827
4828 poff = offsetof(CPUARMState, vfp.preg_tmp);
4829 tcg_gen_st_i64(tmp, tcg_env, poff);
4830 }
4831
4832 t_pg = tcg_temp_new_ptr();
4833 tcg_gen_addi_ptr(t_pg, tcg_env, poff);
4834
4835 gen_helper_gvec_mem *fn
4836 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
4837 desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt);
4838 fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
4839
4840 /*
4841 * Replicate that first octaword.
4842 * The replication happens in units of 32; if the full vector size
4843 * is not a multiple of 32, the final bits are zeroed.
4844 */
4845 doff = vec_full_reg_offset(s, zt);
4846 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
4847 if (vsz >= 64) {
4848 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
4849 }
4850 vsz -= vsz_r32;
4851 if (vsz) {
4852 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
4853 }
4854 }
4855
trans_LD1RO_zprr(DisasContext * s,arg_rprr_load * a)4856 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
4857 {
4858 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4859 return false;
4860 }
4861 if (a->rm == 31) {
4862 return false;
4863 }
4864 s->is_nonstreaming = true;
4865 if (sve_access_check(s)) {
4866 TCGv_i64 addr = tcg_temp_new_i64();
4867 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4868 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4869 do_ldro(s, a->rd, a->pg, addr, a->dtype);
4870 }
4871 return true;
4872 }
4873
trans_LD1RO_zpri(DisasContext * s,arg_rpri_load * a)4874 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
4875 {
4876 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4877 return false;
4878 }
4879 s->is_nonstreaming = true;
4880 if (sve_access_check(s)) {
4881 TCGv_i64 addr = tcg_temp_new_i64();
4882 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
4883 do_ldro(s, a->rd, a->pg, addr, a->dtype);
4884 }
4885 return true;
4886 }
4887
4888 /* Load and broadcast element. */
trans_LD1R_zpri(DisasContext * s,arg_rpri_load * a)4889 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4890 {
4891 unsigned vsz = vec_full_reg_size(s);
4892 unsigned psz = pred_full_reg_size(s);
4893 unsigned esz = dtype_esz[a->dtype];
4894 unsigned msz = dtype_msz(a->dtype);
4895 TCGLabel *over;
4896 TCGv_i64 temp, clean_addr;
4897 MemOp memop;
4898
4899 if (!dc_isar_feature(aa64_sve, s)) {
4900 return false;
4901 }
4902 if (!sve_access_check(s)) {
4903 return true;
4904 }
4905
4906 over = gen_new_label();
4907
4908 /* If the guarding predicate has no bits set, no load occurs. */
4909 if (psz <= 8) {
4910 /* Reduce the pred_esz_masks value simply to reduce the
4911 * size of the code generated here.
4912 */
4913 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4914 temp = tcg_temp_new_i64();
4915 tcg_gen_ld_i64(temp, tcg_env, pred_full_reg_offset(s, a->pg));
4916 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4917 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4918 } else {
4919 TCGv_i32 t32 = tcg_temp_new_i32();
4920 find_last_active(s, t32, esz, a->pg);
4921 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4922 }
4923
4924 /* Load the data. */
4925 temp = tcg_temp_new_i64();
4926 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4927
4928 memop = finalize_memop(s, dtype_mop[a->dtype]);
4929 clean_addr = gen_mte_check1(s, temp, false, true, memop);
4930 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop);
4931
4932 /* Broadcast to *all* elements. */
4933 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4934 vsz, vsz, temp);
4935
4936 /* Zero the inactive elements. */
4937 gen_set_label(over);
4938 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
4939 }
4940
do_st_zpa(DisasContext * s,int zt,int pg,TCGv_i64 addr,int msz,int esz,int nreg)4941 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4942 int msz, int esz, int nreg)
4943 {
4944 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
4945 { { { gen_helper_sve_st1bb_r,
4946 gen_helper_sve_st1bh_r,
4947 gen_helper_sve_st1bs_r,
4948 gen_helper_sve_st1bd_r },
4949 { NULL,
4950 gen_helper_sve_st1hh_le_r,
4951 gen_helper_sve_st1hs_le_r,
4952 gen_helper_sve_st1hd_le_r },
4953 { NULL, NULL,
4954 gen_helper_sve_st1ss_le_r,
4955 gen_helper_sve_st1sd_le_r },
4956 { NULL, NULL, NULL,
4957 gen_helper_sve_st1dd_le_r } },
4958 { { gen_helper_sve_st1bb_r,
4959 gen_helper_sve_st1bh_r,
4960 gen_helper_sve_st1bs_r,
4961 gen_helper_sve_st1bd_r },
4962 { NULL,
4963 gen_helper_sve_st1hh_be_r,
4964 gen_helper_sve_st1hs_be_r,
4965 gen_helper_sve_st1hd_be_r },
4966 { NULL, NULL,
4967 gen_helper_sve_st1ss_be_r,
4968 gen_helper_sve_st1sd_be_r },
4969 { NULL, NULL, NULL,
4970 gen_helper_sve_st1dd_be_r } } },
4971
4972 { { { gen_helper_sve_st1bb_r_mte,
4973 gen_helper_sve_st1bh_r_mte,
4974 gen_helper_sve_st1bs_r_mte,
4975 gen_helper_sve_st1bd_r_mte },
4976 { NULL,
4977 gen_helper_sve_st1hh_le_r_mte,
4978 gen_helper_sve_st1hs_le_r_mte,
4979 gen_helper_sve_st1hd_le_r_mte },
4980 { NULL, NULL,
4981 gen_helper_sve_st1ss_le_r_mte,
4982 gen_helper_sve_st1sd_le_r_mte },
4983 { NULL, NULL, NULL,
4984 gen_helper_sve_st1dd_le_r_mte } },
4985 { { gen_helper_sve_st1bb_r_mte,
4986 gen_helper_sve_st1bh_r_mte,
4987 gen_helper_sve_st1bs_r_mte,
4988 gen_helper_sve_st1bd_r_mte },
4989 { NULL,
4990 gen_helper_sve_st1hh_be_r_mte,
4991 gen_helper_sve_st1hs_be_r_mte,
4992 gen_helper_sve_st1hd_be_r_mte },
4993 { NULL, NULL,
4994 gen_helper_sve_st1ss_be_r_mte,
4995 gen_helper_sve_st1sd_be_r_mte },
4996 { NULL, NULL, NULL,
4997 gen_helper_sve_st1dd_be_r_mte } } },
4998 };
4999 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5000 { { { gen_helper_sve_st2bb_r,
5001 gen_helper_sve_st2hh_le_r,
5002 gen_helper_sve_st2ss_le_r,
5003 gen_helper_sve_st2dd_le_r },
5004 { gen_helper_sve_st3bb_r,
5005 gen_helper_sve_st3hh_le_r,
5006 gen_helper_sve_st3ss_le_r,
5007 gen_helper_sve_st3dd_le_r },
5008 { gen_helper_sve_st4bb_r,
5009 gen_helper_sve_st4hh_le_r,
5010 gen_helper_sve_st4ss_le_r,
5011 gen_helper_sve_st4dd_le_r } },
5012 { { gen_helper_sve_st2bb_r,
5013 gen_helper_sve_st2hh_be_r,
5014 gen_helper_sve_st2ss_be_r,
5015 gen_helper_sve_st2dd_be_r },
5016 { gen_helper_sve_st3bb_r,
5017 gen_helper_sve_st3hh_be_r,
5018 gen_helper_sve_st3ss_be_r,
5019 gen_helper_sve_st3dd_be_r },
5020 { gen_helper_sve_st4bb_r,
5021 gen_helper_sve_st4hh_be_r,
5022 gen_helper_sve_st4ss_be_r,
5023 gen_helper_sve_st4dd_be_r } } },
5024 { { { gen_helper_sve_st2bb_r_mte,
5025 gen_helper_sve_st2hh_le_r_mte,
5026 gen_helper_sve_st2ss_le_r_mte,
5027 gen_helper_sve_st2dd_le_r_mte },
5028 { gen_helper_sve_st3bb_r_mte,
5029 gen_helper_sve_st3hh_le_r_mte,
5030 gen_helper_sve_st3ss_le_r_mte,
5031 gen_helper_sve_st3dd_le_r_mte },
5032 { gen_helper_sve_st4bb_r_mte,
5033 gen_helper_sve_st4hh_le_r_mte,
5034 gen_helper_sve_st4ss_le_r_mte,
5035 gen_helper_sve_st4dd_le_r_mte } },
5036 { { gen_helper_sve_st2bb_r_mte,
5037 gen_helper_sve_st2hh_be_r_mte,
5038 gen_helper_sve_st2ss_be_r_mte,
5039 gen_helper_sve_st2dd_be_r_mte },
5040 { gen_helper_sve_st3bb_r_mte,
5041 gen_helper_sve_st3hh_be_r_mte,
5042 gen_helper_sve_st3ss_be_r_mte,
5043 gen_helper_sve_st3dd_be_r_mte },
5044 { gen_helper_sve_st4bb_r_mte,
5045 gen_helper_sve_st4hh_be_r_mte,
5046 gen_helper_sve_st4ss_be_r_mte,
5047 gen_helper_sve_st4dd_be_r_mte } } },
5048 };
5049 gen_helper_gvec_mem *fn;
5050 int be = s->be_data == MO_BE;
5051
5052 if (nreg == 0) {
5053 /* ST1 */
5054 fn = fn_single[s->mte_active[0]][be][msz][esz];
5055 } else {
5056 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5057 assert(msz == esz);
5058 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
5059 }
5060 assert(fn != NULL);
5061 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn);
5062 }
5063
trans_ST_zprr(DisasContext * s,arg_rprr_store * a)5064 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5065 {
5066 if (!dc_isar_feature(aa64_sve, s)) {
5067 return false;
5068 }
5069 if (a->rm == 31 || a->msz > a->esz) {
5070 return false;
5071 }
5072 if (sve_access_check(s)) {
5073 TCGv_i64 addr = tcg_temp_new_i64();
5074 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5075 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5076 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5077 }
5078 return true;
5079 }
5080
trans_ST_zpri(DisasContext * s,arg_rpri_store * a)5081 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5082 {
5083 if (!dc_isar_feature(aa64_sve, s)) {
5084 return false;
5085 }
5086 if (a->msz > a->esz) {
5087 return false;
5088 }
5089 if (sve_access_check(s)) {
5090 int vsz = vec_full_reg_size(s);
5091 int elements = vsz >> a->esz;
5092 TCGv_i64 addr = tcg_temp_new_i64();
5093
5094 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5095 (a->imm * elements * (a->nreg + 1)) << a->msz);
5096 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5097 }
5098 return true;
5099 }
5100
5101 /*
5102 *** SVE gather loads / scatter stores
5103 */
5104
do_mem_zpz(DisasContext * s,int zt,int pg,int zm,int scale,TCGv_i64 scalar,int msz,bool is_write,gen_helper_gvec_mem_scatter * fn)5105 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5106 int scale, TCGv_i64 scalar, int msz, bool is_write,
5107 gen_helper_gvec_mem_scatter *fn)
5108 {
5109 TCGv_ptr t_zm = tcg_temp_new_ptr();
5110 TCGv_ptr t_pg = tcg_temp_new_ptr();
5111 TCGv_ptr t_zt = tcg_temp_new_ptr();
5112 uint32_t desc;
5113
5114 tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
5115 tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm));
5116 tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt));
5117
5118 desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale);
5119 fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
5120 }
5121
5122 /* Indexed by [mte][be][ff][xs][u][msz]. */
5123 static gen_helper_gvec_mem_scatter * const
5124 gather_load_fn32[2][2][2][2][2][3] = {
5125 { /* MTE Inactive */
5126 { /* Little-endian */
5127 { { { gen_helper_sve_ldbss_zsu,
5128 gen_helper_sve_ldhss_le_zsu,
5129 NULL, },
5130 { gen_helper_sve_ldbsu_zsu,
5131 gen_helper_sve_ldhsu_le_zsu,
5132 gen_helper_sve_ldss_le_zsu, } },
5133 { { gen_helper_sve_ldbss_zss,
5134 gen_helper_sve_ldhss_le_zss,
5135 NULL, },
5136 { gen_helper_sve_ldbsu_zss,
5137 gen_helper_sve_ldhsu_le_zss,
5138 gen_helper_sve_ldss_le_zss, } } },
5139
5140 /* First-fault */
5141 { { { gen_helper_sve_ldffbss_zsu,
5142 gen_helper_sve_ldffhss_le_zsu,
5143 NULL, },
5144 { gen_helper_sve_ldffbsu_zsu,
5145 gen_helper_sve_ldffhsu_le_zsu,
5146 gen_helper_sve_ldffss_le_zsu, } },
5147 { { gen_helper_sve_ldffbss_zss,
5148 gen_helper_sve_ldffhss_le_zss,
5149 NULL, },
5150 { gen_helper_sve_ldffbsu_zss,
5151 gen_helper_sve_ldffhsu_le_zss,
5152 gen_helper_sve_ldffss_le_zss, } } } },
5153
5154 { /* Big-endian */
5155 { { { gen_helper_sve_ldbss_zsu,
5156 gen_helper_sve_ldhss_be_zsu,
5157 NULL, },
5158 { gen_helper_sve_ldbsu_zsu,
5159 gen_helper_sve_ldhsu_be_zsu,
5160 gen_helper_sve_ldss_be_zsu, } },
5161 { { gen_helper_sve_ldbss_zss,
5162 gen_helper_sve_ldhss_be_zss,
5163 NULL, },
5164 { gen_helper_sve_ldbsu_zss,
5165 gen_helper_sve_ldhsu_be_zss,
5166 gen_helper_sve_ldss_be_zss, } } },
5167
5168 /* First-fault */
5169 { { { gen_helper_sve_ldffbss_zsu,
5170 gen_helper_sve_ldffhss_be_zsu,
5171 NULL, },
5172 { gen_helper_sve_ldffbsu_zsu,
5173 gen_helper_sve_ldffhsu_be_zsu,
5174 gen_helper_sve_ldffss_be_zsu, } },
5175 { { gen_helper_sve_ldffbss_zss,
5176 gen_helper_sve_ldffhss_be_zss,
5177 NULL, },
5178 { gen_helper_sve_ldffbsu_zss,
5179 gen_helper_sve_ldffhsu_be_zss,
5180 gen_helper_sve_ldffss_be_zss, } } } } },
5181 { /* MTE Active */
5182 { /* Little-endian */
5183 { { { gen_helper_sve_ldbss_zsu_mte,
5184 gen_helper_sve_ldhss_le_zsu_mte,
5185 NULL, },
5186 { gen_helper_sve_ldbsu_zsu_mte,
5187 gen_helper_sve_ldhsu_le_zsu_mte,
5188 gen_helper_sve_ldss_le_zsu_mte, } },
5189 { { gen_helper_sve_ldbss_zss_mte,
5190 gen_helper_sve_ldhss_le_zss_mte,
5191 NULL, },
5192 { gen_helper_sve_ldbsu_zss_mte,
5193 gen_helper_sve_ldhsu_le_zss_mte,
5194 gen_helper_sve_ldss_le_zss_mte, } } },
5195
5196 /* First-fault */
5197 { { { gen_helper_sve_ldffbss_zsu_mte,
5198 gen_helper_sve_ldffhss_le_zsu_mte,
5199 NULL, },
5200 { gen_helper_sve_ldffbsu_zsu_mte,
5201 gen_helper_sve_ldffhsu_le_zsu_mte,
5202 gen_helper_sve_ldffss_le_zsu_mte, } },
5203 { { gen_helper_sve_ldffbss_zss_mte,
5204 gen_helper_sve_ldffhss_le_zss_mte,
5205 NULL, },
5206 { gen_helper_sve_ldffbsu_zss_mte,
5207 gen_helper_sve_ldffhsu_le_zss_mte,
5208 gen_helper_sve_ldffss_le_zss_mte, } } } },
5209
5210 { /* Big-endian */
5211 { { { gen_helper_sve_ldbss_zsu_mte,
5212 gen_helper_sve_ldhss_be_zsu_mte,
5213 NULL, },
5214 { gen_helper_sve_ldbsu_zsu_mte,
5215 gen_helper_sve_ldhsu_be_zsu_mte,
5216 gen_helper_sve_ldss_be_zsu_mte, } },
5217 { { gen_helper_sve_ldbss_zss_mte,
5218 gen_helper_sve_ldhss_be_zss_mte,
5219 NULL, },
5220 { gen_helper_sve_ldbsu_zss_mte,
5221 gen_helper_sve_ldhsu_be_zss_mte,
5222 gen_helper_sve_ldss_be_zss_mte, } } },
5223
5224 /* First-fault */
5225 { { { gen_helper_sve_ldffbss_zsu_mte,
5226 gen_helper_sve_ldffhss_be_zsu_mte,
5227 NULL, },
5228 { gen_helper_sve_ldffbsu_zsu_mte,
5229 gen_helper_sve_ldffhsu_be_zsu_mte,
5230 gen_helper_sve_ldffss_be_zsu_mte, } },
5231 { { gen_helper_sve_ldffbss_zss_mte,
5232 gen_helper_sve_ldffhss_be_zss_mte,
5233 NULL, },
5234 { gen_helper_sve_ldffbsu_zss_mte,
5235 gen_helper_sve_ldffhsu_be_zss_mte,
5236 gen_helper_sve_ldffss_be_zss_mte, } } } } },
5237 };
5238
5239 /* Note that we overload xs=2 to indicate 64-bit offset. */
5240 static gen_helper_gvec_mem_scatter * const
5241 gather_load_fn64[2][2][2][3][2][4] = {
5242 { /* MTE Inactive */
5243 { /* Little-endian */
5244 { { { gen_helper_sve_ldbds_zsu,
5245 gen_helper_sve_ldhds_le_zsu,
5246 gen_helper_sve_ldsds_le_zsu,
5247 NULL, },
5248 { gen_helper_sve_ldbdu_zsu,
5249 gen_helper_sve_ldhdu_le_zsu,
5250 gen_helper_sve_ldsdu_le_zsu,
5251 gen_helper_sve_lddd_le_zsu, } },
5252 { { gen_helper_sve_ldbds_zss,
5253 gen_helper_sve_ldhds_le_zss,
5254 gen_helper_sve_ldsds_le_zss,
5255 NULL, },
5256 { gen_helper_sve_ldbdu_zss,
5257 gen_helper_sve_ldhdu_le_zss,
5258 gen_helper_sve_ldsdu_le_zss,
5259 gen_helper_sve_lddd_le_zss, } },
5260 { { gen_helper_sve_ldbds_zd,
5261 gen_helper_sve_ldhds_le_zd,
5262 gen_helper_sve_ldsds_le_zd,
5263 NULL, },
5264 { gen_helper_sve_ldbdu_zd,
5265 gen_helper_sve_ldhdu_le_zd,
5266 gen_helper_sve_ldsdu_le_zd,
5267 gen_helper_sve_lddd_le_zd, } } },
5268
5269 /* First-fault */
5270 { { { gen_helper_sve_ldffbds_zsu,
5271 gen_helper_sve_ldffhds_le_zsu,
5272 gen_helper_sve_ldffsds_le_zsu,
5273 NULL, },
5274 { gen_helper_sve_ldffbdu_zsu,
5275 gen_helper_sve_ldffhdu_le_zsu,
5276 gen_helper_sve_ldffsdu_le_zsu,
5277 gen_helper_sve_ldffdd_le_zsu, } },
5278 { { gen_helper_sve_ldffbds_zss,
5279 gen_helper_sve_ldffhds_le_zss,
5280 gen_helper_sve_ldffsds_le_zss,
5281 NULL, },
5282 { gen_helper_sve_ldffbdu_zss,
5283 gen_helper_sve_ldffhdu_le_zss,
5284 gen_helper_sve_ldffsdu_le_zss,
5285 gen_helper_sve_ldffdd_le_zss, } },
5286 { { gen_helper_sve_ldffbds_zd,
5287 gen_helper_sve_ldffhds_le_zd,
5288 gen_helper_sve_ldffsds_le_zd,
5289 NULL, },
5290 { gen_helper_sve_ldffbdu_zd,
5291 gen_helper_sve_ldffhdu_le_zd,
5292 gen_helper_sve_ldffsdu_le_zd,
5293 gen_helper_sve_ldffdd_le_zd, } } } },
5294 { /* Big-endian */
5295 { { { gen_helper_sve_ldbds_zsu,
5296 gen_helper_sve_ldhds_be_zsu,
5297 gen_helper_sve_ldsds_be_zsu,
5298 NULL, },
5299 { gen_helper_sve_ldbdu_zsu,
5300 gen_helper_sve_ldhdu_be_zsu,
5301 gen_helper_sve_ldsdu_be_zsu,
5302 gen_helper_sve_lddd_be_zsu, } },
5303 { { gen_helper_sve_ldbds_zss,
5304 gen_helper_sve_ldhds_be_zss,
5305 gen_helper_sve_ldsds_be_zss,
5306 NULL, },
5307 { gen_helper_sve_ldbdu_zss,
5308 gen_helper_sve_ldhdu_be_zss,
5309 gen_helper_sve_ldsdu_be_zss,
5310 gen_helper_sve_lddd_be_zss, } },
5311 { { gen_helper_sve_ldbds_zd,
5312 gen_helper_sve_ldhds_be_zd,
5313 gen_helper_sve_ldsds_be_zd,
5314 NULL, },
5315 { gen_helper_sve_ldbdu_zd,
5316 gen_helper_sve_ldhdu_be_zd,
5317 gen_helper_sve_ldsdu_be_zd,
5318 gen_helper_sve_lddd_be_zd, } } },
5319
5320 /* First-fault */
5321 { { { gen_helper_sve_ldffbds_zsu,
5322 gen_helper_sve_ldffhds_be_zsu,
5323 gen_helper_sve_ldffsds_be_zsu,
5324 NULL, },
5325 { gen_helper_sve_ldffbdu_zsu,
5326 gen_helper_sve_ldffhdu_be_zsu,
5327 gen_helper_sve_ldffsdu_be_zsu,
5328 gen_helper_sve_ldffdd_be_zsu, } },
5329 { { gen_helper_sve_ldffbds_zss,
5330 gen_helper_sve_ldffhds_be_zss,
5331 gen_helper_sve_ldffsds_be_zss,
5332 NULL, },
5333 { gen_helper_sve_ldffbdu_zss,
5334 gen_helper_sve_ldffhdu_be_zss,
5335 gen_helper_sve_ldffsdu_be_zss,
5336 gen_helper_sve_ldffdd_be_zss, } },
5337 { { gen_helper_sve_ldffbds_zd,
5338 gen_helper_sve_ldffhds_be_zd,
5339 gen_helper_sve_ldffsds_be_zd,
5340 NULL, },
5341 { gen_helper_sve_ldffbdu_zd,
5342 gen_helper_sve_ldffhdu_be_zd,
5343 gen_helper_sve_ldffsdu_be_zd,
5344 gen_helper_sve_ldffdd_be_zd, } } } } },
5345 { /* MTE Active */
5346 { /* Little-endian */
5347 { { { gen_helper_sve_ldbds_zsu_mte,
5348 gen_helper_sve_ldhds_le_zsu_mte,
5349 gen_helper_sve_ldsds_le_zsu_mte,
5350 NULL, },
5351 { gen_helper_sve_ldbdu_zsu_mte,
5352 gen_helper_sve_ldhdu_le_zsu_mte,
5353 gen_helper_sve_ldsdu_le_zsu_mte,
5354 gen_helper_sve_lddd_le_zsu_mte, } },
5355 { { gen_helper_sve_ldbds_zss_mte,
5356 gen_helper_sve_ldhds_le_zss_mte,
5357 gen_helper_sve_ldsds_le_zss_mte,
5358 NULL, },
5359 { gen_helper_sve_ldbdu_zss_mte,
5360 gen_helper_sve_ldhdu_le_zss_mte,
5361 gen_helper_sve_ldsdu_le_zss_mte,
5362 gen_helper_sve_lddd_le_zss_mte, } },
5363 { { gen_helper_sve_ldbds_zd_mte,
5364 gen_helper_sve_ldhds_le_zd_mte,
5365 gen_helper_sve_ldsds_le_zd_mte,
5366 NULL, },
5367 { gen_helper_sve_ldbdu_zd_mte,
5368 gen_helper_sve_ldhdu_le_zd_mte,
5369 gen_helper_sve_ldsdu_le_zd_mte,
5370 gen_helper_sve_lddd_le_zd_mte, } } },
5371
5372 /* First-fault */
5373 { { { gen_helper_sve_ldffbds_zsu_mte,
5374 gen_helper_sve_ldffhds_le_zsu_mte,
5375 gen_helper_sve_ldffsds_le_zsu_mte,
5376 NULL, },
5377 { gen_helper_sve_ldffbdu_zsu_mte,
5378 gen_helper_sve_ldffhdu_le_zsu_mte,
5379 gen_helper_sve_ldffsdu_le_zsu_mte,
5380 gen_helper_sve_ldffdd_le_zsu_mte, } },
5381 { { gen_helper_sve_ldffbds_zss_mte,
5382 gen_helper_sve_ldffhds_le_zss_mte,
5383 gen_helper_sve_ldffsds_le_zss_mte,
5384 NULL, },
5385 { gen_helper_sve_ldffbdu_zss_mte,
5386 gen_helper_sve_ldffhdu_le_zss_mte,
5387 gen_helper_sve_ldffsdu_le_zss_mte,
5388 gen_helper_sve_ldffdd_le_zss_mte, } },
5389 { { gen_helper_sve_ldffbds_zd_mte,
5390 gen_helper_sve_ldffhds_le_zd_mte,
5391 gen_helper_sve_ldffsds_le_zd_mte,
5392 NULL, },
5393 { gen_helper_sve_ldffbdu_zd_mte,
5394 gen_helper_sve_ldffhdu_le_zd_mte,
5395 gen_helper_sve_ldffsdu_le_zd_mte,
5396 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5397 { /* Big-endian */
5398 { { { gen_helper_sve_ldbds_zsu_mte,
5399 gen_helper_sve_ldhds_be_zsu_mte,
5400 gen_helper_sve_ldsds_be_zsu_mte,
5401 NULL, },
5402 { gen_helper_sve_ldbdu_zsu_mte,
5403 gen_helper_sve_ldhdu_be_zsu_mte,
5404 gen_helper_sve_ldsdu_be_zsu_mte,
5405 gen_helper_sve_lddd_be_zsu_mte, } },
5406 { { gen_helper_sve_ldbds_zss_mte,
5407 gen_helper_sve_ldhds_be_zss_mte,
5408 gen_helper_sve_ldsds_be_zss_mte,
5409 NULL, },
5410 { gen_helper_sve_ldbdu_zss_mte,
5411 gen_helper_sve_ldhdu_be_zss_mte,
5412 gen_helper_sve_ldsdu_be_zss_mte,
5413 gen_helper_sve_lddd_be_zss_mte, } },
5414 { { gen_helper_sve_ldbds_zd_mte,
5415 gen_helper_sve_ldhds_be_zd_mte,
5416 gen_helper_sve_ldsds_be_zd_mte,
5417 NULL, },
5418 { gen_helper_sve_ldbdu_zd_mte,
5419 gen_helper_sve_ldhdu_be_zd_mte,
5420 gen_helper_sve_ldsdu_be_zd_mte,
5421 gen_helper_sve_lddd_be_zd_mte, } } },
5422
5423 /* First-fault */
5424 { { { gen_helper_sve_ldffbds_zsu_mte,
5425 gen_helper_sve_ldffhds_be_zsu_mte,
5426 gen_helper_sve_ldffsds_be_zsu_mte,
5427 NULL, },
5428 { gen_helper_sve_ldffbdu_zsu_mte,
5429 gen_helper_sve_ldffhdu_be_zsu_mte,
5430 gen_helper_sve_ldffsdu_be_zsu_mte,
5431 gen_helper_sve_ldffdd_be_zsu_mte, } },
5432 { { gen_helper_sve_ldffbds_zss_mte,
5433 gen_helper_sve_ldffhds_be_zss_mte,
5434 gen_helper_sve_ldffsds_be_zss_mte,
5435 NULL, },
5436 { gen_helper_sve_ldffbdu_zss_mte,
5437 gen_helper_sve_ldffhdu_be_zss_mte,
5438 gen_helper_sve_ldffsdu_be_zss_mte,
5439 gen_helper_sve_ldffdd_be_zss_mte, } },
5440 { { gen_helper_sve_ldffbds_zd_mte,
5441 gen_helper_sve_ldffhds_be_zd_mte,
5442 gen_helper_sve_ldffsds_be_zd_mte,
5443 NULL, },
5444 { gen_helper_sve_ldffbdu_zd_mte,
5445 gen_helper_sve_ldffhdu_be_zd_mte,
5446 gen_helper_sve_ldffsdu_be_zd_mte,
5447 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
5448 };
5449
trans_LD1_zprz(DisasContext * s,arg_LD1_zprz * a)5450 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5451 {
5452 gen_helper_gvec_mem_scatter *fn = NULL;
5453 bool be = s->be_data == MO_BE;
5454 bool mte = s->mte_active[0];
5455
5456 if (!dc_isar_feature(aa64_sve, s)) {
5457 return false;
5458 }
5459 s->is_nonstreaming = true;
5460 if (!sve_access_check(s)) {
5461 return true;
5462 }
5463
5464 switch (a->esz) {
5465 case MO_32:
5466 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
5467 break;
5468 case MO_64:
5469 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
5470 break;
5471 }
5472 assert(fn != NULL);
5473
5474 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5475 cpu_reg_sp(s, a->rn), a->msz, false, fn);
5476 return true;
5477 }
5478
trans_LD1_zpiz(DisasContext * s,arg_LD1_zpiz * a)5479 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5480 {
5481 gen_helper_gvec_mem_scatter *fn = NULL;
5482 bool be = s->be_data == MO_BE;
5483 bool mte = s->mte_active[0];
5484
5485 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5486 return false;
5487 }
5488 if (!dc_isar_feature(aa64_sve, s)) {
5489 return false;
5490 }
5491 s->is_nonstreaming = true;
5492 if (!sve_access_check(s)) {
5493 return true;
5494 }
5495
5496 switch (a->esz) {
5497 case MO_32:
5498 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
5499 break;
5500 case MO_64:
5501 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
5502 break;
5503 }
5504 assert(fn != NULL);
5505
5506 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5507 * by loading the immediate into the scalar parameter.
5508 */
5509 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5510 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
5511 return true;
5512 }
5513
trans_LDNT1_zprz(DisasContext * s,arg_LD1_zprz * a)5514 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
5515 {
5516 gen_helper_gvec_mem_scatter *fn = NULL;
5517 bool be = s->be_data == MO_BE;
5518 bool mte = s->mte_active[0];
5519
5520 if (a->esz < a->msz + !a->u) {
5521 return false;
5522 }
5523 if (!dc_isar_feature(aa64_sve2, s)) {
5524 return false;
5525 }
5526 s->is_nonstreaming = true;
5527 if (!sve_access_check(s)) {
5528 return true;
5529 }
5530
5531 switch (a->esz) {
5532 case MO_32:
5533 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
5534 break;
5535 case MO_64:
5536 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
5537 break;
5538 }
5539 assert(fn != NULL);
5540
5541 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5542 cpu_reg(s, a->rm), a->msz, false, fn);
5543 return true;
5544 }
5545
5546 /* Indexed by [mte][be][xs][msz]. */
5547 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5548 { /* MTE Inactive */
5549 { /* Little-endian */
5550 { gen_helper_sve_stbs_zsu,
5551 gen_helper_sve_sths_le_zsu,
5552 gen_helper_sve_stss_le_zsu, },
5553 { gen_helper_sve_stbs_zss,
5554 gen_helper_sve_sths_le_zss,
5555 gen_helper_sve_stss_le_zss, } },
5556 { /* Big-endian */
5557 { gen_helper_sve_stbs_zsu,
5558 gen_helper_sve_sths_be_zsu,
5559 gen_helper_sve_stss_be_zsu, },
5560 { gen_helper_sve_stbs_zss,
5561 gen_helper_sve_sths_be_zss,
5562 gen_helper_sve_stss_be_zss, } } },
5563 { /* MTE Active */
5564 { /* Little-endian */
5565 { gen_helper_sve_stbs_zsu_mte,
5566 gen_helper_sve_sths_le_zsu_mte,
5567 gen_helper_sve_stss_le_zsu_mte, },
5568 { gen_helper_sve_stbs_zss_mte,
5569 gen_helper_sve_sths_le_zss_mte,
5570 gen_helper_sve_stss_le_zss_mte, } },
5571 { /* Big-endian */
5572 { gen_helper_sve_stbs_zsu_mte,
5573 gen_helper_sve_sths_be_zsu_mte,
5574 gen_helper_sve_stss_be_zsu_mte, },
5575 { gen_helper_sve_stbs_zss_mte,
5576 gen_helper_sve_sths_be_zss_mte,
5577 gen_helper_sve_stss_be_zss_mte, } } },
5578 };
5579
5580 /* Note that we overload xs=2 to indicate 64-bit offset. */
5581 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5582 { /* MTE Inactive */
5583 { /* Little-endian */
5584 { gen_helper_sve_stbd_zsu,
5585 gen_helper_sve_sthd_le_zsu,
5586 gen_helper_sve_stsd_le_zsu,
5587 gen_helper_sve_stdd_le_zsu, },
5588 { gen_helper_sve_stbd_zss,
5589 gen_helper_sve_sthd_le_zss,
5590 gen_helper_sve_stsd_le_zss,
5591 gen_helper_sve_stdd_le_zss, },
5592 { gen_helper_sve_stbd_zd,
5593 gen_helper_sve_sthd_le_zd,
5594 gen_helper_sve_stsd_le_zd,
5595 gen_helper_sve_stdd_le_zd, } },
5596 { /* Big-endian */
5597 { gen_helper_sve_stbd_zsu,
5598 gen_helper_sve_sthd_be_zsu,
5599 gen_helper_sve_stsd_be_zsu,
5600 gen_helper_sve_stdd_be_zsu, },
5601 { gen_helper_sve_stbd_zss,
5602 gen_helper_sve_sthd_be_zss,
5603 gen_helper_sve_stsd_be_zss,
5604 gen_helper_sve_stdd_be_zss, },
5605 { gen_helper_sve_stbd_zd,
5606 gen_helper_sve_sthd_be_zd,
5607 gen_helper_sve_stsd_be_zd,
5608 gen_helper_sve_stdd_be_zd, } } },
5609 { /* MTE Inactive */
5610 { /* Little-endian */
5611 { gen_helper_sve_stbd_zsu_mte,
5612 gen_helper_sve_sthd_le_zsu_mte,
5613 gen_helper_sve_stsd_le_zsu_mte,
5614 gen_helper_sve_stdd_le_zsu_mte, },
5615 { gen_helper_sve_stbd_zss_mte,
5616 gen_helper_sve_sthd_le_zss_mte,
5617 gen_helper_sve_stsd_le_zss_mte,
5618 gen_helper_sve_stdd_le_zss_mte, },
5619 { gen_helper_sve_stbd_zd_mte,
5620 gen_helper_sve_sthd_le_zd_mte,
5621 gen_helper_sve_stsd_le_zd_mte,
5622 gen_helper_sve_stdd_le_zd_mte, } },
5623 { /* Big-endian */
5624 { gen_helper_sve_stbd_zsu_mte,
5625 gen_helper_sve_sthd_be_zsu_mte,
5626 gen_helper_sve_stsd_be_zsu_mte,
5627 gen_helper_sve_stdd_be_zsu_mte, },
5628 { gen_helper_sve_stbd_zss_mte,
5629 gen_helper_sve_sthd_be_zss_mte,
5630 gen_helper_sve_stsd_be_zss_mte,
5631 gen_helper_sve_stdd_be_zss_mte, },
5632 { gen_helper_sve_stbd_zd_mte,
5633 gen_helper_sve_sthd_be_zd_mte,
5634 gen_helper_sve_stsd_be_zd_mte,
5635 gen_helper_sve_stdd_be_zd_mte, } } },
5636 };
5637
trans_ST1_zprz(DisasContext * s,arg_ST1_zprz * a)5638 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5639 {
5640 gen_helper_gvec_mem_scatter *fn;
5641 bool be = s->be_data == MO_BE;
5642 bool mte = s->mte_active[0];
5643
5644 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5645 return false;
5646 }
5647 if (!dc_isar_feature(aa64_sve, s)) {
5648 return false;
5649 }
5650 s->is_nonstreaming = true;
5651 if (!sve_access_check(s)) {
5652 return true;
5653 }
5654 switch (a->esz) {
5655 case MO_32:
5656 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
5657 break;
5658 case MO_64:
5659 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
5660 break;
5661 default:
5662 g_assert_not_reached();
5663 }
5664 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5665 cpu_reg_sp(s, a->rn), a->msz, true, fn);
5666 return true;
5667 }
5668
trans_ST1_zpiz(DisasContext * s,arg_ST1_zpiz * a)5669 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5670 {
5671 gen_helper_gvec_mem_scatter *fn = NULL;
5672 bool be = s->be_data == MO_BE;
5673 bool mte = s->mte_active[0];
5674
5675 if (a->esz < a->msz) {
5676 return false;
5677 }
5678 if (!dc_isar_feature(aa64_sve, s)) {
5679 return false;
5680 }
5681 s->is_nonstreaming = true;
5682 if (!sve_access_check(s)) {
5683 return true;
5684 }
5685
5686 switch (a->esz) {
5687 case MO_32:
5688 fn = scatter_store_fn32[mte][be][0][a->msz];
5689 break;
5690 case MO_64:
5691 fn = scatter_store_fn64[mte][be][2][a->msz];
5692 break;
5693 }
5694 assert(fn != NULL);
5695
5696 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5697 * by loading the immediate into the scalar parameter.
5698 */
5699 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5700 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
5701 return true;
5702 }
5703
trans_STNT1_zprz(DisasContext * s,arg_ST1_zprz * a)5704 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
5705 {
5706 gen_helper_gvec_mem_scatter *fn;
5707 bool be = s->be_data == MO_BE;
5708 bool mte = s->mte_active[0];
5709
5710 if (a->esz < a->msz) {
5711 return false;
5712 }
5713 if (!dc_isar_feature(aa64_sve2, s)) {
5714 return false;
5715 }
5716 s->is_nonstreaming = true;
5717 if (!sve_access_check(s)) {
5718 return true;
5719 }
5720
5721 switch (a->esz) {
5722 case MO_32:
5723 fn = scatter_store_fn32[mte][be][0][a->msz];
5724 break;
5725 case MO_64:
5726 fn = scatter_store_fn64[mte][be][2][a->msz];
5727 break;
5728 default:
5729 g_assert_not_reached();
5730 }
5731
5732 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5733 cpu_reg(s, a->rm), a->msz, true, fn);
5734 return true;
5735 }
5736
5737 /*
5738 * Prefetches
5739 */
5740
trans_PRF(DisasContext * s,arg_PRF * a)5741 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5742 {
5743 if (!dc_isar_feature(aa64_sve, s)) {
5744 return false;
5745 }
5746 /* Prefetch is a nop within QEMU. */
5747 (void)sve_access_check(s);
5748 return true;
5749 }
5750
trans_PRF_rr(DisasContext * s,arg_PRF_rr * a)5751 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5752 {
5753 if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
5754 return false;
5755 }
5756 /* Prefetch is a nop within QEMU. */
5757 (void)sve_access_check(s);
5758 return true;
5759 }
5760
trans_PRF_ns(DisasContext * s,arg_PRF_ns * a)5761 static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
5762 {
5763 if (!dc_isar_feature(aa64_sve, s)) {
5764 return false;
5765 }
5766 /* Prefetch is a nop within QEMU. */
5767 s->is_nonstreaming = true;
5768 (void)sve_access_check(s);
5769 return true;
5770 }
5771
5772 /*
5773 * Move Prefix
5774 *
5775 * TODO: The implementation so far could handle predicated merging movprfx.
5776 * The helper functions as written take an extra source register to
5777 * use in the operation, but the result is only written when predication
5778 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5779 * to allow the final write back to the destination to be unconditional.
5780 * For predicated zeroing movprfx, we need to rearrange the helpers to
5781 * allow the final write back to zero inactives.
5782 *
5783 * In the meantime, just emit the moves.
5784 */
5785
5786 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn)
5787 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz)
5788 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
5789
5790 /*
5791 * SVE2 Integer Multiply - Unpredicated
5792 */
5793
5794 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5795
5796 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
5797 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
5798 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
5799 };
5800 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5801 smulh_zzz_fns[a->esz], a, 0)
5802
5803 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
5804 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
5805 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
5806 };
5807 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5808 umulh_zzz_fns[a->esz], a, 0)
5809
5810 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5811 gen_helper_gvec_pmul_b, a, 0)
5812
5813 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
5814 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
5815 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
5816 };
5817 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5818 sqdmulh_zzz_fns[a->esz], a, 0)
5819
5820 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
5821 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
5822 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
5823 };
5824 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5825 sqrdmulh_zzz_fns[a->esz], a, 0)
5826
5827 /*
5828 * SVE2 Integer - Predicated
5829 */
5830
5831 static gen_helper_gvec_4 * const sadlp_fns[4] = {
5832 NULL, gen_helper_sve2_sadalp_zpzz_h,
5833 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
5834 };
5835 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5836 sadlp_fns[a->esz], a, 0)
5837
5838 static gen_helper_gvec_4 * const uadlp_fns[4] = {
5839 NULL, gen_helper_sve2_uadalp_zpzz_h,
5840 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
5841 };
5842 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5843 uadlp_fns[a->esz], a, 0)
5844
5845 /*
5846 * SVE2 integer unary operations (predicated)
5847 */
5848
5849 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
5850 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
5851
5852 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
5853 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
5854
5855 static gen_helper_gvec_3 * const sqabs_fns[4] = {
5856 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
5857 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
5858 };
5859 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
5860
5861 static gen_helper_gvec_3 * const sqneg_fns[4] = {
5862 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
5863 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
5864 };
5865 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
5866
5867 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
5868 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
5869 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
5870
5871 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
5872 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
5873 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
5874
5875 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
5876 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
5877 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
5878
5879 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
5880 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
5881 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
5882
5883 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
5884 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
5885 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
5886 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
5887 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
5888
5889 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
5890 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
5891 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
5892 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
5893 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
5894 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
5895
5896 /*
5897 * SVE2 Widening Integer Arithmetic
5898 */
5899
5900 static gen_helper_gvec_3 * const saddl_fns[4] = {
5901 NULL, gen_helper_sve2_saddl_h,
5902 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
5903 };
5904 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5905 saddl_fns[a->esz], a, 0)
5906 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5907 saddl_fns[a->esz], a, 3)
5908 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
5909 saddl_fns[a->esz], a, 2)
5910
5911 static gen_helper_gvec_3 * const ssubl_fns[4] = {
5912 NULL, gen_helper_sve2_ssubl_h,
5913 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
5914 };
5915 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5916 ssubl_fns[a->esz], a, 0)
5917 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5918 ssubl_fns[a->esz], a, 3)
5919 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
5920 ssubl_fns[a->esz], a, 2)
5921 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
5922 ssubl_fns[a->esz], a, 1)
5923
5924 static gen_helper_gvec_3 * const sabdl_fns[4] = {
5925 NULL, gen_helper_sve2_sabdl_h,
5926 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
5927 };
5928 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5929 sabdl_fns[a->esz], a, 0)
5930 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5931 sabdl_fns[a->esz], a, 3)
5932
5933 static gen_helper_gvec_3 * const uaddl_fns[4] = {
5934 NULL, gen_helper_sve2_uaddl_h,
5935 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
5936 };
5937 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5938 uaddl_fns[a->esz], a, 0)
5939 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5940 uaddl_fns[a->esz], a, 3)
5941
5942 static gen_helper_gvec_3 * const usubl_fns[4] = {
5943 NULL, gen_helper_sve2_usubl_h,
5944 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
5945 };
5946 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5947 usubl_fns[a->esz], a, 0)
5948 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5949 usubl_fns[a->esz], a, 3)
5950
5951 static gen_helper_gvec_3 * const uabdl_fns[4] = {
5952 NULL, gen_helper_sve2_uabdl_h,
5953 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
5954 };
5955 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5956 uabdl_fns[a->esz], a, 0)
5957 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5958 uabdl_fns[a->esz], a, 3)
5959
5960 static gen_helper_gvec_3 * const sqdmull_fns[4] = {
5961 NULL, gen_helper_sve2_sqdmull_zzz_h,
5962 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
5963 };
5964 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5965 sqdmull_fns[a->esz], a, 0)
5966 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5967 sqdmull_fns[a->esz], a, 3)
5968
5969 static gen_helper_gvec_3 * const smull_fns[4] = {
5970 NULL, gen_helper_sve2_smull_zzz_h,
5971 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
5972 };
5973 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5974 smull_fns[a->esz], a, 0)
5975 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5976 smull_fns[a->esz], a, 3)
5977
5978 static gen_helper_gvec_3 * const umull_fns[4] = {
5979 NULL, gen_helper_sve2_umull_zzz_h,
5980 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
5981 };
5982 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5983 umull_fns[a->esz], a, 0)
5984 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5985 umull_fns[a->esz], a, 3)
5986
5987 static gen_helper_gvec_3 * const eoril_fns[4] = {
5988 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
5989 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
5990 };
5991 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
5992 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
5993
do_trans_pmull(DisasContext * s,arg_rrr_esz * a,bool sel)5994 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
5995 {
5996 static gen_helper_gvec_3 * const fns[4] = {
5997 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
5998 NULL, gen_helper_sve2_pmull_d,
5999 };
6000
6001 if (a->esz == 0) {
6002 if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
6003 return false;
6004 }
6005 s->is_nonstreaming = true;
6006 } else if (!dc_isar_feature(aa64_sve, s)) {
6007 return false;
6008 }
6009 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
6010 }
6011
6012 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6013 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
6014
6015 static gen_helper_gvec_3 * const saddw_fns[4] = {
6016 NULL, gen_helper_sve2_saddw_h,
6017 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6018 };
6019 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6020 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
6021
6022 static gen_helper_gvec_3 * const ssubw_fns[4] = {
6023 NULL, gen_helper_sve2_ssubw_h,
6024 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6025 };
6026 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6027 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
6028
6029 static gen_helper_gvec_3 * const uaddw_fns[4] = {
6030 NULL, gen_helper_sve2_uaddw_h,
6031 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6032 };
6033 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6034 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
6035
6036 static gen_helper_gvec_3 * const usubw_fns[4] = {
6037 NULL, gen_helper_sve2_usubw_h,
6038 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6039 };
6040 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6041 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
6042
gen_sshll_vec(unsigned vece,TCGv_vec d,TCGv_vec n,int64_t imm)6043 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6044 {
6045 int top = imm & 1;
6046 int shl = imm >> 1;
6047 int halfbits = 4 << vece;
6048
6049 if (top) {
6050 if (shl == halfbits) {
6051 TCGv_vec t = tcg_temp_new_vec_matching(d);
6052 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6053 tcg_gen_and_vec(vece, d, n, t);
6054 } else {
6055 tcg_gen_sari_vec(vece, d, n, halfbits);
6056 tcg_gen_shli_vec(vece, d, d, shl);
6057 }
6058 } else {
6059 tcg_gen_shli_vec(vece, d, n, halfbits);
6060 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6061 }
6062 }
6063
gen_ushll_i64(unsigned vece,TCGv_i64 d,TCGv_i64 n,int imm)6064 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6065 {
6066 int halfbits = 4 << vece;
6067 int top = imm & 1;
6068 int shl = (imm >> 1);
6069 int shift;
6070 uint64_t mask;
6071
6072 mask = MAKE_64BIT_MASK(0, halfbits);
6073 mask <<= shl;
6074 mask = dup_const(vece, mask);
6075
6076 shift = shl - top * halfbits;
6077 if (shift < 0) {
6078 tcg_gen_shri_i64(d, n, -shift);
6079 } else {
6080 tcg_gen_shli_i64(d, n, shift);
6081 }
6082 tcg_gen_andi_i64(d, d, mask);
6083 }
6084
gen_ushll16_i64(TCGv_i64 d,TCGv_i64 n,int64_t imm)6085 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6086 {
6087 gen_ushll_i64(MO_16, d, n, imm);
6088 }
6089
gen_ushll32_i64(TCGv_i64 d,TCGv_i64 n,int64_t imm)6090 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6091 {
6092 gen_ushll_i64(MO_32, d, n, imm);
6093 }
6094
gen_ushll64_i64(TCGv_i64 d,TCGv_i64 n,int64_t imm)6095 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6096 {
6097 gen_ushll_i64(MO_64, d, n, imm);
6098 }
6099
gen_ushll_vec(unsigned vece,TCGv_vec d,TCGv_vec n,int64_t imm)6100 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6101 {
6102 int halfbits = 4 << vece;
6103 int top = imm & 1;
6104 int shl = imm >> 1;
6105
6106 if (top) {
6107 if (shl == halfbits) {
6108 TCGv_vec t = tcg_temp_new_vec_matching(d);
6109 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6110 tcg_gen_and_vec(vece, d, n, t);
6111 } else {
6112 tcg_gen_shri_vec(vece, d, n, halfbits);
6113 tcg_gen_shli_vec(vece, d, d, shl);
6114 }
6115 } else {
6116 if (shl == 0) {
6117 TCGv_vec t = tcg_temp_new_vec_matching(d);
6118 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6119 tcg_gen_and_vec(vece, d, n, t);
6120 } else {
6121 tcg_gen_shli_vec(vece, d, n, halfbits);
6122 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6123 }
6124 }
6125 }
6126
do_shll_tb(DisasContext * s,arg_rri_esz * a,const GVecGen2i ops[3],bool sel)6127 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a,
6128 const GVecGen2i ops[3], bool sel)
6129 {
6130
6131 if (a->esz < 0 || a->esz > 2) {
6132 return false;
6133 }
6134 if (sve_access_check(s)) {
6135 unsigned vsz = vec_full_reg_size(s);
6136 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6137 vec_full_reg_offset(s, a->rn),
6138 vsz, vsz, (a->imm << 1) | sel,
6139 &ops[a->esz]);
6140 }
6141 return true;
6142 }
6143
6144 static const TCGOpcode sshll_list[] = {
6145 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6146 };
6147 static const GVecGen2i sshll_ops[3] = {
6148 { .fniv = gen_sshll_vec,
6149 .opt_opc = sshll_list,
6150 .fno = gen_helper_sve2_sshll_h,
6151 .vece = MO_16 },
6152 { .fniv = gen_sshll_vec,
6153 .opt_opc = sshll_list,
6154 .fno = gen_helper_sve2_sshll_s,
6155 .vece = MO_32 },
6156 { .fniv = gen_sshll_vec,
6157 .opt_opc = sshll_list,
6158 .fno = gen_helper_sve2_sshll_d,
6159 .vece = MO_64 }
6160 };
6161 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false)
6162 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true)
6163
6164 static const TCGOpcode ushll_list[] = {
6165 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6166 };
6167 static const GVecGen2i ushll_ops[3] = {
6168 { .fni8 = gen_ushll16_i64,
6169 .fniv = gen_ushll_vec,
6170 .opt_opc = ushll_list,
6171 .fno = gen_helper_sve2_ushll_h,
6172 .vece = MO_16 },
6173 { .fni8 = gen_ushll32_i64,
6174 .fniv = gen_ushll_vec,
6175 .opt_opc = ushll_list,
6176 .fno = gen_helper_sve2_ushll_s,
6177 .vece = MO_32 },
6178 { .fni8 = gen_ushll64_i64,
6179 .fniv = gen_ushll_vec,
6180 .opt_opc = ushll_list,
6181 .fno = gen_helper_sve2_ushll_d,
6182 .vece = MO_64 },
6183 };
6184 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false)
6185 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true)
6186
6187 static gen_helper_gvec_3 * const bext_fns[4] = {
6188 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6189 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6190 };
6191 TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6192 bext_fns[a->esz], a, 0)
6193
6194 static gen_helper_gvec_3 * const bdep_fns[4] = {
6195 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6196 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6197 };
6198 TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6199 bdep_fns[a->esz], a, 0)
6200
6201 static gen_helper_gvec_3 * const bgrp_fns[4] = {
6202 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6203 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6204 };
6205 TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6206 bgrp_fns[a->esz], a, 0)
6207
6208 static gen_helper_gvec_3 * const cadd_fns[4] = {
6209 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6210 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6211 };
6212 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6213 cadd_fns[a->esz], a, 0)
6214 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6215 cadd_fns[a->esz], a, 1)
6216
6217 static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6218 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6219 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6220 };
6221 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6222 sqcadd_fns[a->esz], a, 0)
6223 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6224 sqcadd_fns[a->esz], a, 1)
6225
6226 static gen_helper_gvec_4 * const sabal_fns[4] = {
6227 NULL, gen_helper_sve2_sabal_h,
6228 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6229 };
6230 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6231 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
6232
6233 static gen_helper_gvec_4 * const uabal_fns[4] = {
6234 NULL, gen_helper_sve2_uabal_h,
6235 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6236 };
6237 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6238 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
6239
do_adcl(DisasContext * s,arg_rrrr_esz * a,bool sel)6240 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6241 {
6242 static gen_helper_gvec_4 * const fns[2] = {
6243 gen_helper_sve2_adcl_s,
6244 gen_helper_sve2_adcl_d,
6245 };
6246 /*
6247 * Note that in this case the ESZ field encodes both size and sign.
6248 * Split out 'subtract' into bit 1 of the data field for the helper.
6249 */
6250 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
6251 }
6252
TRANS_FEAT(ADCLB,aa64_sve2,do_adcl,a,false)6253 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6254 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
6255
6256 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
6257 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
6258 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
6259 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
6260 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
6261 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
6262
6263 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6264 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
6265
6266 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a,
6267 const GVecGen2 ops[3])
6268 {
6269 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) {
6270 return false;
6271 }
6272 if (sve_access_check(s)) {
6273 unsigned vsz = vec_full_reg_size(s);
6274 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6275 vec_full_reg_offset(s, a->rn),
6276 vsz, vsz, &ops[a->esz]);
6277 }
6278 return true;
6279 }
6280
6281 static const TCGOpcode sqxtn_list[] = {
6282 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6283 };
6284
gen_sqxtnb_vec(unsigned vece,TCGv_vec d,TCGv_vec n)6285 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6286 {
6287 TCGv_vec t = tcg_temp_new_vec_matching(d);
6288 int halfbits = 4 << vece;
6289 int64_t mask = (1ull << halfbits) - 1;
6290 int64_t min = -1ull << (halfbits - 1);
6291 int64_t max = -min - 1;
6292
6293 tcg_gen_dupi_vec(vece, t, min);
6294 tcg_gen_smax_vec(vece, d, n, t);
6295 tcg_gen_dupi_vec(vece, t, max);
6296 tcg_gen_smin_vec(vece, d, d, t);
6297 tcg_gen_dupi_vec(vece, t, mask);
6298 tcg_gen_and_vec(vece, d, d, t);
6299 }
6300
6301 static const GVecGen2 sqxtnb_ops[3] = {
6302 { .fniv = gen_sqxtnb_vec,
6303 .opt_opc = sqxtn_list,
6304 .fno = gen_helper_sve2_sqxtnb_h,
6305 .vece = MO_16 },
6306 { .fniv = gen_sqxtnb_vec,
6307 .opt_opc = sqxtn_list,
6308 .fno = gen_helper_sve2_sqxtnb_s,
6309 .vece = MO_32 },
6310 { .fniv = gen_sqxtnb_vec,
6311 .opt_opc = sqxtn_list,
6312 .fno = gen_helper_sve2_sqxtnb_d,
6313 .vece = MO_64 },
6314 };
TRANS_FEAT(SQXTNB,aa64_sve2,do_narrow_extract,a,sqxtnb_ops)6315 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
6316
6317 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6318 {
6319 TCGv_vec t = tcg_temp_new_vec_matching(d);
6320 int halfbits = 4 << vece;
6321 int64_t mask = (1ull << halfbits) - 1;
6322 int64_t min = -1ull << (halfbits - 1);
6323 int64_t max = -min - 1;
6324
6325 tcg_gen_dupi_vec(vece, t, min);
6326 tcg_gen_smax_vec(vece, n, n, t);
6327 tcg_gen_dupi_vec(vece, t, max);
6328 tcg_gen_smin_vec(vece, n, n, t);
6329 tcg_gen_shli_vec(vece, n, n, halfbits);
6330 tcg_gen_dupi_vec(vece, t, mask);
6331 tcg_gen_bitsel_vec(vece, d, t, d, n);
6332 }
6333
6334 static const GVecGen2 sqxtnt_ops[3] = {
6335 { .fniv = gen_sqxtnt_vec,
6336 .opt_opc = sqxtn_list,
6337 .load_dest = true,
6338 .fno = gen_helper_sve2_sqxtnt_h,
6339 .vece = MO_16 },
6340 { .fniv = gen_sqxtnt_vec,
6341 .opt_opc = sqxtn_list,
6342 .load_dest = true,
6343 .fno = gen_helper_sve2_sqxtnt_s,
6344 .vece = MO_32 },
6345 { .fniv = gen_sqxtnt_vec,
6346 .opt_opc = sqxtn_list,
6347 .load_dest = true,
6348 .fno = gen_helper_sve2_sqxtnt_d,
6349 .vece = MO_64 },
6350 };
6351 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops)
6352
6353 static const TCGOpcode uqxtn_list[] = {
6354 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6355 };
6356
gen_uqxtnb_vec(unsigned vece,TCGv_vec d,TCGv_vec n)6357 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6358 {
6359 TCGv_vec t = tcg_temp_new_vec_matching(d);
6360 int halfbits = 4 << vece;
6361 int64_t max = (1ull << halfbits) - 1;
6362
6363 tcg_gen_dupi_vec(vece, t, max);
6364 tcg_gen_umin_vec(vece, d, n, t);
6365 }
6366
6367 static const GVecGen2 uqxtnb_ops[3] = {
6368 { .fniv = gen_uqxtnb_vec,
6369 .opt_opc = uqxtn_list,
6370 .fno = gen_helper_sve2_uqxtnb_h,
6371 .vece = MO_16 },
6372 { .fniv = gen_uqxtnb_vec,
6373 .opt_opc = uqxtn_list,
6374 .fno = gen_helper_sve2_uqxtnb_s,
6375 .vece = MO_32 },
6376 { .fniv = gen_uqxtnb_vec,
6377 .opt_opc = uqxtn_list,
6378 .fno = gen_helper_sve2_uqxtnb_d,
6379 .vece = MO_64 },
6380 };
TRANS_FEAT(UQXTNB,aa64_sve2,do_narrow_extract,a,uqxtnb_ops)6381 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
6382
6383 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6384 {
6385 TCGv_vec t = tcg_temp_new_vec_matching(d);
6386 int halfbits = 4 << vece;
6387 int64_t max = (1ull << halfbits) - 1;
6388
6389 tcg_gen_dupi_vec(vece, t, max);
6390 tcg_gen_umin_vec(vece, n, n, t);
6391 tcg_gen_shli_vec(vece, n, n, halfbits);
6392 tcg_gen_bitsel_vec(vece, d, t, d, n);
6393 }
6394
6395 static const GVecGen2 uqxtnt_ops[3] = {
6396 { .fniv = gen_uqxtnt_vec,
6397 .opt_opc = uqxtn_list,
6398 .load_dest = true,
6399 .fno = gen_helper_sve2_uqxtnt_h,
6400 .vece = MO_16 },
6401 { .fniv = gen_uqxtnt_vec,
6402 .opt_opc = uqxtn_list,
6403 .load_dest = true,
6404 .fno = gen_helper_sve2_uqxtnt_s,
6405 .vece = MO_32 },
6406 { .fniv = gen_uqxtnt_vec,
6407 .opt_opc = uqxtn_list,
6408 .load_dest = true,
6409 .fno = gen_helper_sve2_uqxtnt_d,
6410 .vece = MO_64 },
6411 };
6412 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops)
6413
6414 static const TCGOpcode sqxtun_list[] = {
6415 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
6416 };
6417
gen_sqxtunb_vec(unsigned vece,TCGv_vec d,TCGv_vec n)6418 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6419 {
6420 TCGv_vec t = tcg_temp_new_vec_matching(d);
6421 int halfbits = 4 << vece;
6422 int64_t max = (1ull << halfbits) - 1;
6423
6424 tcg_gen_dupi_vec(vece, t, 0);
6425 tcg_gen_smax_vec(vece, d, n, t);
6426 tcg_gen_dupi_vec(vece, t, max);
6427 tcg_gen_umin_vec(vece, d, d, t);
6428 }
6429
6430 static const GVecGen2 sqxtunb_ops[3] = {
6431 { .fniv = gen_sqxtunb_vec,
6432 .opt_opc = sqxtun_list,
6433 .fno = gen_helper_sve2_sqxtunb_h,
6434 .vece = MO_16 },
6435 { .fniv = gen_sqxtunb_vec,
6436 .opt_opc = sqxtun_list,
6437 .fno = gen_helper_sve2_sqxtunb_s,
6438 .vece = MO_32 },
6439 { .fniv = gen_sqxtunb_vec,
6440 .opt_opc = sqxtun_list,
6441 .fno = gen_helper_sve2_sqxtunb_d,
6442 .vece = MO_64 },
6443 };
TRANS_FEAT(SQXTUNB,aa64_sve2,do_narrow_extract,a,sqxtunb_ops)6444 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
6445
6446 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6447 {
6448 TCGv_vec t = tcg_temp_new_vec_matching(d);
6449 int halfbits = 4 << vece;
6450 int64_t max = (1ull << halfbits) - 1;
6451
6452 tcg_gen_dupi_vec(vece, t, 0);
6453 tcg_gen_smax_vec(vece, n, n, t);
6454 tcg_gen_dupi_vec(vece, t, max);
6455 tcg_gen_umin_vec(vece, n, n, t);
6456 tcg_gen_shli_vec(vece, n, n, halfbits);
6457 tcg_gen_bitsel_vec(vece, d, t, d, n);
6458 }
6459
6460 static const GVecGen2 sqxtunt_ops[3] = {
6461 { .fniv = gen_sqxtunt_vec,
6462 .opt_opc = sqxtun_list,
6463 .load_dest = true,
6464 .fno = gen_helper_sve2_sqxtunt_h,
6465 .vece = MO_16 },
6466 { .fniv = gen_sqxtunt_vec,
6467 .opt_opc = sqxtun_list,
6468 .load_dest = true,
6469 .fno = gen_helper_sve2_sqxtunt_s,
6470 .vece = MO_32 },
6471 { .fniv = gen_sqxtunt_vec,
6472 .opt_opc = sqxtun_list,
6473 .load_dest = true,
6474 .fno = gen_helper_sve2_sqxtunt_d,
6475 .vece = MO_64 },
6476 };
TRANS_FEAT(SQXTUNT,aa64_sve2,do_narrow_extract,a,sqxtunt_ops)6477 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops)
6478
6479 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a,
6480 const GVecGen2i ops[3])
6481 {
6482 if (a->esz < 0 || a->esz > MO_32) {
6483 return false;
6484 }
6485 assert(a->imm > 0 && a->imm <= (8 << a->esz));
6486 if (sve_access_check(s)) {
6487 unsigned vsz = vec_full_reg_size(s);
6488 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6489 vec_full_reg_offset(s, a->rn),
6490 vsz, vsz, a->imm, &ops[a->esz]);
6491 }
6492 return true;
6493 }
6494
gen_shrnb_i64(unsigned vece,TCGv_i64 d,TCGv_i64 n,int shr)6495 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6496 {
6497 int halfbits = 4 << vece;
6498 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6499
6500 tcg_gen_shri_i64(d, n, shr);
6501 tcg_gen_andi_i64(d, d, mask);
6502 }
6503
gen_shrnb16_i64(TCGv_i64 d,TCGv_i64 n,int64_t shr)6504 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6505 {
6506 gen_shrnb_i64(MO_16, d, n, shr);
6507 }
6508
gen_shrnb32_i64(TCGv_i64 d,TCGv_i64 n,int64_t shr)6509 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6510 {
6511 gen_shrnb_i64(MO_32, d, n, shr);
6512 }
6513
gen_shrnb64_i64(TCGv_i64 d,TCGv_i64 n,int64_t shr)6514 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6515 {
6516 gen_shrnb_i64(MO_64, d, n, shr);
6517 }
6518
gen_shrnb_vec(unsigned vece,TCGv_vec d,TCGv_vec n,int64_t shr)6519 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6520 {
6521 TCGv_vec t = tcg_temp_new_vec_matching(d);
6522 int halfbits = 4 << vece;
6523 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6524
6525 tcg_gen_shri_vec(vece, n, n, shr);
6526 tcg_gen_dupi_vec(vece, t, mask);
6527 tcg_gen_and_vec(vece, d, n, t);
6528 }
6529
6530 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
6531 static const GVecGen2i shrnb_ops[3] = {
6532 { .fni8 = gen_shrnb16_i64,
6533 .fniv = gen_shrnb_vec,
6534 .opt_opc = shrnb_vec_list,
6535 .fno = gen_helper_sve2_shrnb_h,
6536 .vece = MO_16 },
6537 { .fni8 = gen_shrnb32_i64,
6538 .fniv = gen_shrnb_vec,
6539 .opt_opc = shrnb_vec_list,
6540 .fno = gen_helper_sve2_shrnb_s,
6541 .vece = MO_32 },
6542 { .fni8 = gen_shrnb64_i64,
6543 .fniv = gen_shrnb_vec,
6544 .opt_opc = shrnb_vec_list,
6545 .fno = gen_helper_sve2_shrnb_d,
6546 .vece = MO_64 },
6547 };
TRANS_FEAT(SHRNB,aa64_sve2,do_shr_narrow,a,shrnb_ops)6548 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops)
6549
6550 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6551 {
6552 int halfbits = 4 << vece;
6553 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6554
6555 tcg_gen_shli_i64(n, n, halfbits - shr);
6556 tcg_gen_andi_i64(n, n, ~mask);
6557 tcg_gen_andi_i64(d, d, mask);
6558 tcg_gen_or_i64(d, d, n);
6559 }
6560
gen_shrnt16_i64(TCGv_i64 d,TCGv_i64 n,int64_t shr)6561 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6562 {
6563 gen_shrnt_i64(MO_16, d, n, shr);
6564 }
6565
gen_shrnt32_i64(TCGv_i64 d,TCGv_i64 n,int64_t shr)6566 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6567 {
6568 gen_shrnt_i64(MO_32, d, n, shr);
6569 }
6570
gen_shrnt64_i64(TCGv_i64 d,TCGv_i64 n,int64_t shr)6571 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6572 {
6573 tcg_gen_shri_i64(n, n, shr);
6574 tcg_gen_deposit_i64(d, d, n, 32, 32);
6575 }
6576
gen_shrnt_vec(unsigned vece,TCGv_vec d,TCGv_vec n,int64_t shr)6577 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6578 {
6579 TCGv_vec t = tcg_temp_new_vec_matching(d);
6580 int halfbits = 4 << vece;
6581 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6582
6583 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
6584 tcg_gen_dupi_vec(vece, t, mask);
6585 tcg_gen_bitsel_vec(vece, d, t, d, n);
6586 }
6587
6588 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
6589 static const GVecGen2i shrnt_ops[3] = {
6590 { .fni8 = gen_shrnt16_i64,
6591 .fniv = gen_shrnt_vec,
6592 .opt_opc = shrnt_vec_list,
6593 .load_dest = true,
6594 .fno = gen_helper_sve2_shrnt_h,
6595 .vece = MO_16 },
6596 { .fni8 = gen_shrnt32_i64,
6597 .fniv = gen_shrnt_vec,
6598 .opt_opc = shrnt_vec_list,
6599 .load_dest = true,
6600 .fno = gen_helper_sve2_shrnt_s,
6601 .vece = MO_32 },
6602 { .fni8 = gen_shrnt64_i64,
6603 .fniv = gen_shrnt_vec,
6604 .opt_opc = shrnt_vec_list,
6605 .load_dest = true,
6606 .fno = gen_helper_sve2_shrnt_d,
6607 .vece = MO_64 },
6608 };
6609 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops)
6610
6611 static const GVecGen2i rshrnb_ops[3] = {
6612 { .fno = gen_helper_sve2_rshrnb_h },
6613 { .fno = gen_helper_sve2_rshrnb_s },
6614 { .fno = gen_helper_sve2_rshrnb_d },
6615 };
6616 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops)
6617
6618 static const GVecGen2i rshrnt_ops[3] = {
6619 { .fno = gen_helper_sve2_rshrnt_h },
6620 { .fno = gen_helper_sve2_rshrnt_s },
6621 { .fno = gen_helper_sve2_rshrnt_d },
6622 };
TRANS_FEAT(RSHRNT,aa64_sve2,do_shr_narrow,a,rshrnt_ops)6623 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
6624
6625 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
6626 TCGv_vec n, int64_t shr)
6627 {
6628 TCGv_vec t = tcg_temp_new_vec_matching(d);
6629 int halfbits = 4 << vece;
6630
6631 tcg_gen_sari_vec(vece, n, n, shr);
6632 tcg_gen_dupi_vec(vece, t, 0);
6633 tcg_gen_smax_vec(vece, n, n, t);
6634 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6635 tcg_gen_umin_vec(vece, d, n, t);
6636 }
6637
6638 static const TCGOpcode sqshrunb_vec_list[] = {
6639 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6640 };
6641 static const GVecGen2i sqshrunb_ops[3] = {
6642 { .fniv = gen_sqshrunb_vec,
6643 .opt_opc = sqshrunb_vec_list,
6644 .fno = gen_helper_sve2_sqshrunb_h,
6645 .vece = MO_16 },
6646 { .fniv = gen_sqshrunb_vec,
6647 .opt_opc = sqshrunb_vec_list,
6648 .fno = gen_helper_sve2_sqshrunb_s,
6649 .vece = MO_32 },
6650 { .fniv = gen_sqshrunb_vec,
6651 .opt_opc = sqshrunb_vec_list,
6652 .fno = gen_helper_sve2_sqshrunb_d,
6653 .vece = MO_64 },
6654 };
TRANS_FEAT(SQSHRUNB,aa64_sve2,do_shr_narrow,a,sqshrunb_ops)6655 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
6656
6657 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
6658 TCGv_vec n, int64_t shr)
6659 {
6660 TCGv_vec t = tcg_temp_new_vec_matching(d);
6661 int halfbits = 4 << vece;
6662
6663 tcg_gen_sari_vec(vece, n, n, shr);
6664 tcg_gen_dupi_vec(vece, t, 0);
6665 tcg_gen_smax_vec(vece, n, n, t);
6666 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6667 tcg_gen_umin_vec(vece, n, n, t);
6668 tcg_gen_shli_vec(vece, n, n, halfbits);
6669 tcg_gen_bitsel_vec(vece, d, t, d, n);
6670 }
6671
6672 static const TCGOpcode sqshrunt_vec_list[] = {
6673 INDEX_op_shli_vec, INDEX_op_sari_vec,
6674 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6675 };
6676 static const GVecGen2i sqshrunt_ops[3] = {
6677 { .fniv = gen_sqshrunt_vec,
6678 .opt_opc = sqshrunt_vec_list,
6679 .load_dest = true,
6680 .fno = gen_helper_sve2_sqshrunt_h,
6681 .vece = MO_16 },
6682 { .fniv = gen_sqshrunt_vec,
6683 .opt_opc = sqshrunt_vec_list,
6684 .load_dest = true,
6685 .fno = gen_helper_sve2_sqshrunt_s,
6686 .vece = MO_32 },
6687 { .fniv = gen_sqshrunt_vec,
6688 .opt_opc = sqshrunt_vec_list,
6689 .load_dest = true,
6690 .fno = gen_helper_sve2_sqshrunt_d,
6691 .vece = MO_64 },
6692 };
6693 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops)
6694
6695 static const GVecGen2i sqrshrunb_ops[3] = {
6696 { .fno = gen_helper_sve2_sqrshrunb_h },
6697 { .fno = gen_helper_sve2_sqrshrunb_s },
6698 { .fno = gen_helper_sve2_sqrshrunb_d },
6699 };
6700 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops)
6701
6702 static const GVecGen2i sqrshrunt_ops[3] = {
6703 { .fno = gen_helper_sve2_sqrshrunt_h },
6704 { .fno = gen_helper_sve2_sqrshrunt_s },
6705 { .fno = gen_helper_sve2_sqrshrunt_d },
6706 };
TRANS_FEAT(SQRSHRUNT,aa64_sve2,do_shr_narrow,a,sqrshrunt_ops)6707 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
6708
6709 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
6710 TCGv_vec n, int64_t shr)
6711 {
6712 TCGv_vec t = tcg_temp_new_vec_matching(d);
6713 int halfbits = 4 << vece;
6714 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6715 int64_t min = -max - 1;
6716
6717 tcg_gen_sari_vec(vece, n, n, shr);
6718 tcg_gen_dupi_vec(vece, t, min);
6719 tcg_gen_smax_vec(vece, n, n, t);
6720 tcg_gen_dupi_vec(vece, t, max);
6721 tcg_gen_smin_vec(vece, n, n, t);
6722 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6723 tcg_gen_and_vec(vece, d, n, t);
6724 }
6725
6726 static const TCGOpcode sqshrnb_vec_list[] = {
6727 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6728 };
6729 static const GVecGen2i sqshrnb_ops[3] = {
6730 { .fniv = gen_sqshrnb_vec,
6731 .opt_opc = sqshrnb_vec_list,
6732 .fno = gen_helper_sve2_sqshrnb_h,
6733 .vece = MO_16 },
6734 { .fniv = gen_sqshrnb_vec,
6735 .opt_opc = sqshrnb_vec_list,
6736 .fno = gen_helper_sve2_sqshrnb_s,
6737 .vece = MO_32 },
6738 { .fniv = gen_sqshrnb_vec,
6739 .opt_opc = sqshrnb_vec_list,
6740 .fno = gen_helper_sve2_sqshrnb_d,
6741 .vece = MO_64 },
6742 };
TRANS_FEAT(SQSHRNB,aa64_sve2,do_shr_narrow,a,sqshrnb_ops)6743 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
6744
6745 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
6746 TCGv_vec n, int64_t shr)
6747 {
6748 TCGv_vec t = tcg_temp_new_vec_matching(d);
6749 int halfbits = 4 << vece;
6750 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6751 int64_t min = -max - 1;
6752
6753 tcg_gen_sari_vec(vece, n, n, shr);
6754 tcg_gen_dupi_vec(vece, t, min);
6755 tcg_gen_smax_vec(vece, n, n, t);
6756 tcg_gen_dupi_vec(vece, t, max);
6757 tcg_gen_smin_vec(vece, n, n, t);
6758 tcg_gen_shli_vec(vece, n, n, halfbits);
6759 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6760 tcg_gen_bitsel_vec(vece, d, t, d, n);
6761 }
6762
6763 static const TCGOpcode sqshrnt_vec_list[] = {
6764 INDEX_op_shli_vec, INDEX_op_sari_vec,
6765 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6766 };
6767 static const GVecGen2i sqshrnt_ops[3] = {
6768 { .fniv = gen_sqshrnt_vec,
6769 .opt_opc = sqshrnt_vec_list,
6770 .load_dest = true,
6771 .fno = gen_helper_sve2_sqshrnt_h,
6772 .vece = MO_16 },
6773 { .fniv = gen_sqshrnt_vec,
6774 .opt_opc = sqshrnt_vec_list,
6775 .load_dest = true,
6776 .fno = gen_helper_sve2_sqshrnt_s,
6777 .vece = MO_32 },
6778 { .fniv = gen_sqshrnt_vec,
6779 .opt_opc = sqshrnt_vec_list,
6780 .load_dest = true,
6781 .fno = gen_helper_sve2_sqshrnt_d,
6782 .vece = MO_64 },
6783 };
6784 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops)
6785
6786 static const GVecGen2i sqrshrnb_ops[3] = {
6787 { .fno = gen_helper_sve2_sqrshrnb_h },
6788 { .fno = gen_helper_sve2_sqrshrnb_s },
6789 { .fno = gen_helper_sve2_sqrshrnb_d },
6790 };
6791 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops)
6792
6793 static const GVecGen2i sqrshrnt_ops[3] = {
6794 { .fno = gen_helper_sve2_sqrshrnt_h },
6795 { .fno = gen_helper_sve2_sqrshrnt_s },
6796 { .fno = gen_helper_sve2_sqrshrnt_d },
6797 };
TRANS_FEAT(SQRSHRNT,aa64_sve2,do_shr_narrow,a,sqrshrnt_ops)6798 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
6799
6800 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
6801 TCGv_vec n, int64_t shr)
6802 {
6803 TCGv_vec t = tcg_temp_new_vec_matching(d);
6804 int halfbits = 4 << vece;
6805
6806 tcg_gen_shri_vec(vece, n, n, shr);
6807 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6808 tcg_gen_umin_vec(vece, d, n, t);
6809 }
6810
6811 static const TCGOpcode uqshrnb_vec_list[] = {
6812 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
6813 };
6814 static const GVecGen2i uqshrnb_ops[3] = {
6815 { .fniv = gen_uqshrnb_vec,
6816 .opt_opc = uqshrnb_vec_list,
6817 .fno = gen_helper_sve2_uqshrnb_h,
6818 .vece = MO_16 },
6819 { .fniv = gen_uqshrnb_vec,
6820 .opt_opc = uqshrnb_vec_list,
6821 .fno = gen_helper_sve2_uqshrnb_s,
6822 .vece = MO_32 },
6823 { .fniv = gen_uqshrnb_vec,
6824 .opt_opc = uqshrnb_vec_list,
6825 .fno = gen_helper_sve2_uqshrnb_d,
6826 .vece = MO_64 },
6827 };
TRANS_FEAT(UQSHRNB,aa64_sve2,do_shr_narrow,a,uqshrnb_ops)6828 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
6829
6830 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
6831 TCGv_vec n, int64_t shr)
6832 {
6833 TCGv_vec t = tcg_temp_new_vec_matching(d);
6834 int halfbits = 4 << vece;
6835
6836 tcg_gen_shri_vec(vece, n, n, shr);
6837 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6838 tcg_gen_umin_vec(vece, n, n, t);
6839 tcg_gen_shli_vec(vece, n, n, halfbits);
6840 tcg_gen_bitsel_vec(vece, d, t, d, n);
6841 }
6842
6843 static const TCGOpcode uqshrnt_vec_list[] = {
6844 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
6845 };
6846 static const GVecGen2i uqshrnt_ops[3] = {
6847 { .fniv = gen_uqshrnt_vec,
6848 .opt_opc = uqshrnt_vec_list,
6849 .load_dest = true,
6850 .fno = gen_helper_sve2_uqshrnt_h,
6851 .vece = MO_16 },
6852 { .fniv = gen_uqshrnt_vec,
6853 .opt_opc = uqshrnt_vec_list,
6854 .load_dest = true,
6855 .fno = gen_helper_sve2_uqshrnt_s,
6856 .vece = MO_32 },
6857 { .fniv = gen_uqshrnt_vec,
6858 .opt_opc = uqshrnt_vec_list,
6859 .load_dest = true,
6860 .fno = gen_helper_sve2_uqshrnt_d,
6861 .vece = MO_64 },
6862 };
6863 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops)
6864
6865 static const GVecGen2i uqrshrnb_ops[3] = {
6866 { .fno = gen_helper_sve2_uqrshrnb_h },
6867 { .fno = gen_helper_sve2_uqrshrnb_s },
6868 { .fno = gen_helper_sve2_uqrshrnb_d },
6869 };
6870 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops)
6871
6872 static const GVecGen2i uqrshrnt_ops[3] = {
6873 { .fno = gen_helper_sve2_uqrshrnt_h },
6874 { .fno = gen_helper_sve2_uqrshrnt_s },
6875 { .fno = gen_helper_sve2_uqrshrnt_d },
6876 };
6877 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops)
6878
6879 #define DO_SVE2_ZZZ_NARROW(NAME, name) \
6880 static gen_helper_gvec_3 * const name##_fns[4] = { \
6881 NULL, gen_helper_sve2_##name##_h, \
6882 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
6883 }; \
6884 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
6885 name##_fns[a->esz], a, 0)
6886
6887 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
6888 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
6889 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
6890 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
6891
6892 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
6893 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
6894 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
6895 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
6896
6897 static gen_helper_gvec_flags_4 * const match_fns[4] = {
6898 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
6899 };
6900 TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
6901
6902 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
6903 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
6904 };
6905 TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
6906
6907 static gen_helper_gvec_4 * const histcnt_fns[4] = {
6908 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
6909 };
6910 TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
6911 histcnt_fns[a->esz], a, 0)
6912
6913 TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
6914 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
6915
6916 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
6917 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
6918 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz)
6919 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz)
6920 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
6921
6922 /*
6923 * SVE Integer Multiply-Add (unpredicated)
6924 */
6925
6926 TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
6927 gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
6928 0, FPST_FPCR)
6929 TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
6930 gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
6931 0, FPST_FPCR)
6932
6933 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
6934 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
6935 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
6936 };
6937 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6938 sqdmlal_zzzw_fns[a->esz], a, 0)
6939 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6940 sqdmlal_zzzw_fns[a->esz], a, 3)
6941 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
6942 sqdmlal_zzzw_fns[a->esz], a, 2)
6943
6944 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
6945 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
6946 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
6947 };
6948 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6949 sqdmlsl_zzzw_fns[a->esz], a, 0)
6950 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6951 sqdmlsl_zzzw_fns[a->esz], a, 3)
6952 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
6953 sqdmlsl_zzzw_fns[a->esz], a, 2)
6954
6955 static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
6956 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
6957 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
6958 };
6959 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
6960 sqrdmlah_fns[a->esz], a, 0)
6961
6962 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
6963 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
6964 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
6965 };
6966 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
6967 sqrdmlsh_fns[a->esz], a, 0)
6968
6969 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
6970 NULL, gen_helper_sve2_smlal_zzzw_h,
6971 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
6972 };
6973 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6974 smlal_zzzw_fns[a->esz], a, 0)
6975 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6976 smlal_zzzw_fns[a->esz], a, 1)
6977
6978 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
6979 NULL, gen_helper_sve2_umlal_zzzw_h,
6980 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
6981 };
6982 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6983 umlal_zzzw_fns[a->esz], a, 0)
6984 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6985 umlal_zzzw_fns[a->esz], a, 1)
6986
6987 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
6988 NULL, gen_helper_sve2_smlsl_zzzw_h,
6989 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
6990 };
6991 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6992 smlsl_zzzw_fns[a->esz], a, 0)
6993 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
6994 smlsl_zzzw_fns[a->esz], a, 1)
6995
6996 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
6997 NULL, gen_helper_sve2_umlsl_zzzw_h,
6998 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
6999 };
7000 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7001 umlsl_zzzw_fns[a->esz], a, 0)
7002 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7003 umlsl_zzzw_fns[a->esz], a, 1)
7004
7005 static gen_helper_gvec_4 * const cmla_fns[] = {
7006 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7007 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7008 };
7009 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7010 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7011
7012 static gen_helper_gvec_4 * const cdot_fns[] = {
7013 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7014 };
7015 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7016 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7017
7018 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7019 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7020 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7021 };
7022 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7023 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7024
7025 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7026 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
7027
7028 TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7029 gen_helper_crypto_aesmc, a->rd, a->rd, 0)
7030 TRANS_FEAT_NONSTREAMING(AESIMC, aa64_sve2_aes, gen_gvec_ool_zz,
7031 gen_helper_crypto_aesimc, a->rd, a->rd, 0)
7032
7033 TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7034 gen_helper_crypto_aese, a, 0)
7035 TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7036 gen_helper_crypto_aesd, a, 0)
7037
7038 TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7039 gen_helper_crypto_sm4e, a, 0)
7040 TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7041 gen_helper_crypto_sm4ekey, a, 0)
7042
7043 TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
7044 gen_gvec_rax1, a)
7045
7046 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
7047 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
7048 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
7049 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR)
7050
7051 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
7052 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR)
7053
7054 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
7055 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR)
7056 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
7057 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
7058
7059 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
7060 FPROUNDING_ODD, gen_helper_sve_fcvt_ds)
7061 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a,
7062 FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds)
7063
7064 static gen_helper_gvec_3_ptr * const flogb_fns[] = {
7065 NULL, gen_helper_flogb_h,
7066 gen_helper_flogb_s, gen_helper_flogb_d
7067 };
7068 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
7069 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
7070
do_FMLAL_zzzw(DisasContext * s,arg_rrrr_esz * a,bool sub,bool sel)7071 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7072 {
7073 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
7074 a->rd, a->rn, a->rm, a->ra,
7075 (sel << 1) | sub, tcg_env);
7076 }
7077
TRANS_FEAT(FMLALB_zzzw,aa64_sve2,do_FMLAL_zzzw,a,false,false)7078 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false)
7079 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true)
7080 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false)
7081 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true)
7082
7083 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7084 {
7085 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
7086 a->rd, a->rn, a->rm, a->ra,
7087 (a->index << 2) | (sel << 1) | sub, tcg_env);
7088 }
7089
TRANS_FEAT(FMLALB_zzxw,aa64_sve2,do_FMLAL_zzxw,a,false,false)7090 TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false)
7091 TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
7092 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
7093 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
7094
7095 TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7096 gen_helper_gvec_smmla_b, a, 0)
7097 TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7098 gen_helper_gvec_usmmla_b, a, 0)
7099 TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7100 gen_helper_gvec_ummla_b, a, 0)
7101
7102 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7103 gen_helper_gvec_bfdot, a, 0)
7104 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
7105 gen_helper_gvec_bfdot_idx, a)
7106
7107 TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7108 gen_helper_gvec_bfmmla, a, 0)
7109
7110 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7111 {
7112 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
7113 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
7114 }
7115
TRANS_FEAT(BFMLALB_zzzw,aa64_sve_bf16,do_BFMLAL_zzzw,a,false)7116 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
7117 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true)
7118
7119 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7120 {
7121 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
7122 a->rd, a->rn, a->rm, a->ra,
7123 (a->index << 1) | sel, FPST_FPCR);
7124 }
7125
TRANS_FEAT(BFMLALB_zzxw,aa64_sve_bf16,do_BFMLAL_zzxw,a,false)7126 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
7127 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
7128
7129 static bool trans_PSEL(DisasContext *s, arg_psel *a)
7130 {
7131 int vl = vec_full_reg_size(s);
7132 int pl = pred_gvec_reg_size(s);
7133 int elements = vl >> a->esz;
7134 TCGv_i64 tmp, didx, dbit;
7135 TCGv_ptr ptr;
7136
7137 if (!dc_isar_feature(aa64_sme, s)) {
7138 return false;
7139 }
7140 if (!sve_access_check(s)) {
7141 return true;
7142 }
7143
7144 tmp = tcg_temp_new_i64();
7145 dbit = tcg_temp_new_i64();
7146 didx = tcg_temp_new_i64();
7147 ptr = tcg_temp_new_ptr();
7148
7149 /* Compute the predicate element. */
7150 tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
7151 if (is_power_of_2(elements)) {
7152 tcg_gen_andi_i64(tmp, tmp, elements - 1);
7153 } else {
7154 tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
7155 }
7156
7157 /* Extract the predicate byte and bit indices. */
7158 tcg_gen_shli_i64(tmp, tmp, a->esz);
7159 tcg_gen_andi_i64(dbit, tmp, 7);
7160 tcg_gen_shri_i64(didx, tmp, 3);
7161 if (HOST_BIG_ENDIAN) {
7162 tcg_gen_xori_i64(didx, didx, 7);
7163 }
7164
7165 /* Load the predicate word. */
7166 tcg_gen_trunc_i64_ptr(ptr, didx);
7167 tcg_gen_add_ptr(ptr, ptr, tcg_env);
7168 tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
7169
7170 /* Extract the predicate bit and replicate to MO_64. */
7171 tcg_gen_shr_i64(tmp, tmp, dbit);
7172 tcg_gen_andi_i64(tmp, tmp, 1);
7173 tcg_gen_neg_i64(tmp, tmp);
7174
7175 /* Apply to either copy the source, or write zeros. */
7176 tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
7177 pred_full_reg_offset(s, a->pn), tmp, pl, pl);
7178 return true;
7179 }
7180
gen_sclamp_i32(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_i32 a)7181 static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
7182 {
7183 tcg_gen_smax_i32(d, a, n);
7184 tcg_gen_smin_i32(d, d, m);
7185 }
7186
gen_sclamp_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_i64 a)7187 static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
7188 {
7189 tcg_gen_smax_i64(d, a, n);
7190 tcg_gen_smin_i64(d, d, m);
7191 }
7192
gen_sclamp_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec a)7193 static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
7194 TCGv_vec m, TCGv_vec a)
7195 {
7196 tcg_gen_smax_vec(vece, d, a, n);
7197 tcg_gen_smin_vec(vece, d, d, m);
7198 }
7199
gen_sclamp(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)7200 static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
7201 uint32_t a, uint32_t oprsz, uint32_t maxsz)
7202 {
7203 static const TCGOpcode vecop[] = {
7204 INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7205 };
7206 static const GVecGen4 ops[4] = {
7207 { .fniv = gen_sclamp_vec,
7208 .fno = gen_helper_gvec_sclamp_b,
7209 .opt_opc = vecop,
7210 .vece = MO_8 },
7211 { .fniv = gen_sclamp_vec,
7212 .fno = gen_helper_gvec_sclamp_h,
7213 .opt_opc = vecop,
7214 .vece = MO_16 },
7215 { .fni4 = gen_sclamp_i32,
7216 .fniv = gen_sclamp_vec,
7217 .fno = gen_helper_gvec_sclamp_s,
7218 .opt_opc = vecop,
7219 .vece = MO_32 },
7220 { .fni8 = gen_sclamp_i64,
7221 .fniv = gen_sclamp_vec,
7222 .fno = gen_helper_gvec_sclamp_d,
7223 .opt_opc = vecop,
7224 .vece = MO_64,
7225 .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
7226 };
7227 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
7228 }
7229
TRANS_FEAT(SCLAMP,aa64_sme,gen_gvec_fn_arg_zzzz,gen_sclamp,a)7230 TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
7231
7232 static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
7233 {
7234 tcg_gen_umax_i32(d, a, n);
7235 tcg_gen_umin_i32(d, d, m);
7236 }
7237
gen_uclamp_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_i64 a)7238 static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
7239 {
7240 tcg_gen_umax_i64(d, a, n);
7241 tcg_gen_umin_i64(d, d, m);
7242 }
7243
gen_uclamp_vec(unsigned vece,TCGv_vec d,TCGv_vec n,TCGv_vec m,TCGv_vec a)7244 static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
7245 TCGv_vec m, TCGv_vec a)
7246 {
7247 tcg_gen_umax_vec(vece, d, a, n);
7248 tcg_gen_umin_vec(vece, d, d, m);
7249 }
7250
gen_uclamp(unsigned vece,uint32_t d,uint32_t n,uint32_t m,uint32_t a,uint32_t oprsz,uint32_t maxsz)7251 static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
7252 uint32_t a, uint32_t oprsz, uint32_t maxsz)
7253 {
7254 static const TCGOpcode vecop[] = {
7255 INDEX_op_umin_vec, INDEX_op_umax_vec, 0
7256 };
7257 static const GVecGen4 ops[4] = {
7258 { .fniv = gen_uclamp_vec,
7259 .fno = gen_helper_gvec_uclamp_b,
7260 .opt_opc = vecop,
7261 .vece = MO_8 },
7262 { .fniv = gen_uclamp_vec,
7263 .fno = gen_helper_gvec_uclamp_h,
7264 .opt_opc = vecop,
7265 .vece = MO_16 },
7266 { .fni4 = gen_uclamp_i32,
7267 .fniv = gen_uclamp_vec,
7268 .fno = gen_helper_gvec_uclamp_s,
7269 .opt_opc = vecop,
7270 .vece = MO_32 },
7271 { .fni8 = gen_uclamp_i64,
7272 .fniv = gen_uclamp_vec,
7273 .fno = gen_helper_gvec_uclamp_d,
7274 .opt_opc = vecop,
7275 .vece = MO_64,
7276 .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
7277 };
7278 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
7279 }
7280
7281 TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
7282