xref: /qemu/target/s390x/tcg/translate_vx.c.inc (revision b2a3cbb8)
1/*
2 * QEMU TCG support -- s390x vector instruction translation functions
3 *
4 * Copyright (C) 2019 Red Hat Inc
5 *
6 * Authors:
7 *   David Hildenbrand <david@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13/*
14 * For most instructions that use the same element size for reads and
15 * writes, we can use real gvec vector expansion, which potantially uses
16 * real host vector instructions. As they only work up to 64 bit elements,
17 * 128 bit elements (vector is a single element) have to be handled
18 * differently. Operations that are too complicated to encode via TCG ops
19 * are handled via gvec ool (out-of-line) handlers.
20 *
21 * As soon as instructions use different element sizes for reads and writes
22 * or access elements "out of their element scope" we expand them manually
23 * in fancy loops, as gvec expansion does not deal with actual element
24 * numbers and does also not support access to other elements.
25 *
26 * 128 bit elements:
27 *  As we only have i32/i64, such elements have to be loaded into two
28 *  i64 values and can then be processed e.g. by tcg_gen_add2_i64.
29 *
30 * Sizes:
31 *  On s390x, the operand size (oprsz) and the maximum size (maxsz) are
32 *  always 16 (128 bit). What gvec code calls "vece", s390x calls "es",
33 *  a.k.a. "element size". These values nicely map to MO_8 ... MO_64. Only
34 *  128 bit element size has to be treated in a special way (MO_64 + 1).
35 *  We will use ES_* instead of MO_* for this reason in this file.
36 *
37 * CC handling:
38 *  As gvec ool-helpers can currently not return values (besides via
39 *  pointers like vectors or cpu_env), whenever we have to set the CC and
40 *  can't conclude the value from the result vector, we will directly
41 *  set it in "env->cc_op" and mark it as static via set_cc_static()".
42 *  Whenever this is done, the helper writes globals (cc_op).
43 */
44
45#define NUM_VEC_ELEMENT_BYTES(es) (1 << (es))
46#define NUM_VEC_ELEMENTS(es) (16 / NUM_VEC_ELEMENT_BYTES(es))
47#define NUM_VEC_ELEMENT_BITS(es) (NUM_VEC_ELEMENT_BYTES(es) * BITS_PER_BYTE)
48
49#define ES_8    MO_8
50#define ES_16   MO_16
51#define ES_32   MO_32
52#define ES_64   MO_64
53#define ES_128  4
54
55/* Floating-Point Format */
56#define FPF_SHORT       2
57#define FPF_LONG        3
58#define FPF_EXT         4
59
60static inline bool valid_vec_element(uint8_t enr, MemOp es)
61{
62    return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1));
63}
64
65static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr,
66                                 MemOp memop)
67{
68    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
69
70    switch ((unsigned)memop) {
71    case ES_8:
72        tcg_gen_ld8u_i64(dst, cpu_env, offs);
73        break;
74    case ES_16:
75        tcg_gen_ld16u_i64(dst, cpu_env, offs);
76        break;
77    case ES_32:
78        tcg_gen_ld32u_i64(dst, cpu_env, offs);
79        break;
80    case ES_8 | MO_SIGN:
81        tcg_gen_ld8s_i64(dst, cpu_env, offs);
82        break;
83    case ES_16 | MO_SIGN:
84        tcg_gen_ld16s_i64(dst, cpu_env, offs);
85        break;
86    case ES_32 | MO_SIGN:
87        tcg_gen_ld32s_i64(dst, cpu_env, offs);
88        break;
89    case ES_64:
90    case ES_64 | MO_SIGN:
91        tcg_gen_ld_i64(dst, cpu_env, offs);
92        break;
93    default:
94        g_assert_not_reached();
95    }
96}
97
98static void read_vec_element_i32(TCGv_i32 dst, uint8_t reg, uint8_t enr,
99                                 MemOp memop)
100{
101    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
102
103    switch (memop) {
104    case ES_8:
105        tcg_gen_ld8u_i32(dst, cpu_env, offs);
106        break;
107    case ES_16:
108        tcg_gen_ld16u_i32(dst, cpu_env, offs);
109        break;
110    case ES_8 | MO_SIGN:
111        tcg_gen_ld8s_i32(dst, cpu_env, offs);
112        break;
113    case ES_16 | MO_SIGN:
114        tcg_gen_ld16s_i32(dst, cpu_env, offs);
115        break;
116    case ES_32:
117    case ES_32 | MO_SIGN:
118        tcg_gen_ld_i32(dst, cpu_env, offs);
119        break;
120    default:
121        g_assert_not_reached();
122    }
123}
124
125static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr,
126                                  MemOp memop)
127{
128    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
129
130    switch (memop) {
131    case ES_8:
132        tcg_gen_st8_i64(src, cpu_env, offs);
133        break;
134    case ES_16:
135        tcg_gen_st16_i64(src, cpu_env, offs);
136        break;
137    case ES_32:
138        tcg_gen_st32_i64(src, cpu_env, offs);
139        break;
140    case ES_64:
141        tcg_gen_st_i64(src, cpu_env, offs);
142        break;
143    default:
144        g_assert_not_reached();
145    }
146}
147
148static void write_vec_element_i32(TCGv_i32 src, int reg, uint8_t enr,
149                                  MemOp memop)
150{
151    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
152
153    switch (memop) {
154    case ES_8:
155        tcg_gen_st8_i32(src, cpu_env, offs);
156        break;
157    case ES_16:
158        tcg_gen_st16_i32(src, cpu_env, offs);
159        break;
160    case ES_32:
161        tcg_gen_st_i32(src, cpu_env, offs);
162        break;
163    default:
164        g_assert_not_reached();
165    }
166}
167
168static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
169                                    uint8_t es)
170{
171    TCGv_i64 tmp = tcg_temp_new_i64();
172
173    /* mask off invalid parts from the element nr */
174    tcg_gen_andi_i64(tmp, enr, NUM_VEC_ELEMENTS(es) - 1);
175
176    /* convert it to an element offset relative to cpu_env (vec_reg_offset() */
177    tcg_gen_shli_i64(tmp, tmp, es);
178#if !HOST_BIG_ENDIAN
179    tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es));
180#endif
181    tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg));
182
183    /* generate the final ptr by adding cpu_env */
184    tcg_gen_trunc_i64_ptr(ptr, tmp);
185    tcg_gen_add_ptr(ptr, ptr, cpu_env);
186
187    tcg_temp_free_i64(tmp);
188}
189
190#define gen_gvec_2(v1, v2, gen) \
191    tcg_gen_gvec_2(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
192                   16, 16, gen)
193#define gen_gvec_2s(v1, v2, c, gen) \
194    tcg_gen_gvec_2s(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
195                    16, 16, c, gen)
196#define gen_gvec_2_ool(v1, v2, data, fn) \
197    tcg_gen_gvec_2_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
198                       16, 16, data, fn)
199#define gen_gvec_2i_ool(v1, v2, c, data, fn) \
200    tcg_gen_gvec_2i_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
201                        c, 16, 16, data, fn)
202#define gen_gvec_2_ptr(v1, v2, ptr, data, fn) \
203    tcg_gen_gvec_2_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
204                       ptr, 16, 16, data, fn)
205#define gen_gvec_3(v1, v2, v3, gen) \
206    tcg_gen_gvec_3(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
207                   vec_full_reg_offset(v3), 16, 16, gen)
208#define gen_gvec_3_ool(v1, v2, v3, data, fn) \
209    tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
210                       vec_full_reg_offset(v3), 16, 16, data, fn)
211#define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \
212    tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
213                       vec_full_reg_offset(v3), ptr, 16, 16, data, fn)
214#define gen_gvec_3i(v1, v2, v3, c, gen) \
215    tcg_gen_gvec_3i(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
216                    vec_full_reg_offset(v3), 16, 16, c, gen)
217#define gen_gvec_4(v1, v2, v3, v4, gen) \
218    tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
219                   vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
220                   16, 16, gen)
221#define gen_gvec_4_ool(v1, v2, v3, v4, data, fn) \
222    tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
223                       vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
224                       16, 16, data, fn)
225#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \
226    tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
227                       vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
228                       ptr, 16, 16, data, fn)
229#define gen_gvec_dup_i64(es, v1, c) \
230    tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
231#define gen_gvec_mov(v1, v2) \
232    tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
233                     16)
234#define gen_gvec_dup_imm(es, v1, c) \
235    tcg_gen_gvec_dup_imm(es, vec_full_reg_offset(v1), 16, 16, c);
236#define gen_gvec_fn_2(fn, es, v1, v2) \
237    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
238                      16, 16)
239#define gen_gvec_fn_2i(fn, es, v1, v2, c) \
240    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
241                      c, 16, 16)
242#define gen_gvec_fn_2s(fn, es, v1, v2, s) \
243    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
244                      s, 16, 16)
245#define gen_gvec_fn_3(fn, es, v1, v2, v3) \
246    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
247                      vec_full_reg_offset(v3), 16, 16)
248#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \
249    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
250                      vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16)
251
252/*
253 * Helper to carry out a 128 bit vector computation using 2 i64 values per
254 * vector.
255 */
256typedef void (*gen_gvec128_3_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
257                                     TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
258static void gen_gvec128_3_i64(gen_gvec128_3_i64_fn fn, uint8_t d, uint8_t a,
259                              uint8_t b)
260{
261        TCGv_i64 dh = tcg_temp_new_i64();
262        TCGv_i64 dl = tcg_temp_new_i64();
263        TCGv_i64 ah = tcg_temp_new_i64();
264        TCGv_i64 al = tcg_temp_new_i64();
265        TCGv_i64 bh = tcg_temp_new_i64();
266        TCGv_i64 bl = tcg_temp_new_i64();
267
268        read_vec_element_i64(ah, a, 0, ES_64);
269        read_vec_element_i64(al, a, 1, ES_64);
270        read_vec_element_i64(bh, b, 0, ES_64);
271        read_vec_element_i64(bl, b, 1, ES_64);
272        fn(dl, dh, al, ah, bl, bh);
273        write_vec_element_i64(dh, d, 0, ES_64);
274        write_vec_element_i64(dl, d, 1, ES_64);
275
276        tcg_temp_free_i64(dh);
277        tcg_temp_free_i64(dl);
278        tcg_temp_free_i64(ah);
279        tcg_temp_free_i64(al);
280        tcg_temp_free_i64(bh);
281        tcg_temp_free_i64(bl);
282}
283
284typedef void (*gen_gvec128_4_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
285                                     TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh,
286                                     TCGv_i64 cl, TCGv_i64 ch);
287static void gen_gvec128_4_i64(gen_gvec128_4_i64_fn fn, uint8_t d, uint8_t a,
288                              uint8_t b, uint8_t c)
289{
290        TCGv_i64 dh = tcg_temp_new_i64();
291        TCGv_i64 dl = tcg_temp_new_i64();
292        TCGv_i64 ah = tcg_temp_new_i64();
293        TCGv_i64 al = tcg_temp_new_i64();
294        TCGv_i64 bh = tcg_temp_new_i64();
295        TCGv_i64 bl = tcg_temp_new_i64();
296        TCGv_i64 ch = tcg_temp_new_i64();
297        TCGv_i64 cl = tcg_temp_new_i64();
298
299        read_vec_element_i64(ah, a, 0, ES_64);
300        read_vec_element_i64(al, a, 1, ES_64);
301        read_vec_element_i64(bh, b, 0, ES_64);
302        read_vec_element_i64(bl, b, 1, ES_64);
303        read_vec_element_i64(ch, c, 0, ES_64);
304        read_vec_element_i64(cl, c, 1, ES_64);
305        fn(dl, dh, al, ah, bl, bh, cl, ch);
306        write_vec_element_i64(dh, d, 0, ES_64);
307        write_vec_element_i64(dl, d, 1, ES_64);
308
309        tcg_temp_free_i64(dh);
310        tcg_temp_free_i64(dl);
311        tcg_temp_free_i64(ah);
312        tcg_temp_free_i64(al);
313        tcg_temp_free_i64(bh);
314        tcg_temp_free_i64(bl);
315        tcg_temp_free_i64(ch);
316        tcg_temp_free_i64(cl);
317}
318
319static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
320                          uint64_t b)
321{
322    TCGv_i64 bl = tcg_const_i64(b);
323    TCGv_i64 bh = tcg_const_i64(0);
324
325    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
326    tcg_temp_free_i64(bl);
327    tcg_temp_free_i64(bh);
328}
329
330static DisasJumpType op_vbperm(DisasContext *s, DisasOps *o)
331{
332    gen_gvec_3_ool(get_field(s, v1), get_field(s, v2), get_field(s, v3), 0,
333                   gen_helper_gvec_vbperm);
334
335    return DISAS_NEXT;
336}
337
338static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
339{
340    const uint8_t es = s->insn->data;
341    const uint8_t enr = get_field(s, m3);
342    TCGv_i64 tmp;
343
344    if (!valid_vec_element(enr, es)) {
345        gen_program_exception(s, PGM_SPECIFICATION);
346        return DISAS_NORETURN;
347    }
348
349    tmp = tcg_temp_new_i64();
350    read_vec_element_i64(tmp, get_field(s, v2), enr, es);
351    tcg_gen_add_i64(o->addr1, o->addr1, tmp);
352    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
353
354    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
355    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
356    tcg_temp_free_i64(tmp);
357    return DISAS_NEXT;
358}
359
360static uint64_t generate_byte_mask(uint8_t mask)
361{
362    uint64_t r = 0;
363    int i;
364
365    for (i = 0; i < 8; i++) {
366        if ((mask >> i) & 1) {
367            r |= 0xffull << (i * 8);
368        }
369    }
370    return r;
371}
372
373static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o)
374{
375    const uint16_t i2 = get_field(s, i2);
376
377    if (i2 == (i2 & 0xff) * 0x0101) {
378        /*
379         * Masks for both 64 bit elements of the vector are the same.
380         * Trust tcg to produce a good constant loading.
381         */
382        gen_gvec_dup_imm(ES_64, get_field(s, v1),
383                         generate_byte_mask(i2 & 0xff));
384    } else {
385        TCGv_i64 t = tcg_temp_new_i64();
386
387        tcg_gen_movi_i64(t, generate_byte_mask(i2 >> 8));
388        write_vec_element_i64(t, get_field(s, v1), 0, ES_64);
389        tcg_gen_movi_i64(t, generate_byte_mask(i2));
390        write_vec_element_i64(t, get_field(s, v1), 1, ES_64);
391        tcg_temp_free_i64(t);
392    }
393    return DISAS_NEXT;
394}
395
396static DisasJumpType op_vgm(DisasContext *s, DisasOps *o)
397{
398    const uint8_t es = get_field(s, m4);
399    const uint8_t bits = NUM_VEC_ELEMENT_BITS(es);
400    const uint8_t i2 = get_field(s, i2) & (bits - 1);
401    const uint8_t i3 = get_field(s, i3) & (bits - 1);
402    uint64_t mask = 0;
403    int i;
404
405    if (es > ES_64) {
406        gen_program_exception(s, PGM_SPECIFICATION);
407        return DISAS_NORETURN;
408    }
409
410    /* generate the mask - take care of wrapping */
411    for (i = i2; ; i = (i + 1) % bits) {
412        mask |= 1ull << (bits - i - 1);
413        if (i == i3) {
414            break;
415        }
416    }
417
418    gen_gvec_dup_imm(es, get_field(s, v1), mask);
419    return DISAS_NEXT;
420}
421
422static DisasJumpType op_vl(DisasContext *s, DisasOps *o)
423{
424    TCGv_i64 t0 = tcg_temp_new_i64();
425    TCGv_i64 t1 = tcg_temp_new_i64();
426
427    tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ);
428    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
429    tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
430    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
431    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
432    tcg_temp_free(t0);
433    tcg_temp_free(t1);
434    return DISAS_NEXT;
435}
436
437static DisasJumpType op_vlr(DisasContext *s, DisasOps *o)
438{
439    gen_gvec_mov(get_field(s, v1), get_field(s, v2));
440    return DISAS_NEXT;
441}
442
443static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o)
444{
445    const uint8_t es = get_field(s, m3);
446    TCGv_i64 tmp;
447
448    if (es > ES_64) {
449        gen_program_exception(s, PGM_SPECIFICATION);
450        return DISAS_NORETURN;
451    }
452
453    tmp = tcg_temp_new_i64();
454    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
455    gen_gvec_dup_i64(es, get_field(s, v1), tmp);
456    tcg_temp_free_i64(tmp);
457    return DISAS_NEXT;
458}
459
460static DisasJumpType op_vlebr(DisasContext *s, DisasOps *o)
461{
462    const uint8_t es = s->insn->data;
463    const uint8_t enr = get_field(s, m3);
464    TCGv_i64 tmp;
465
466    if (!valid_vec_element(enr, es)) {
467        gen_program_exception(s, PGM_SPECIFICATION);
468        return DISAS_NORETURN;
469    }
470
471    tmp = tcg_temp_new_i64();
472    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
473    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
474    tcg_temp_free_i64(tmp);
475    return DISAS_NEXT;
476}
477
478static DisasJumpType op_vlbrrep(DisasContext *s, DisasOps *o)
479{
480    const uint8_t es = get_field(s, m3);
481    TCGv_i64 tmp;
482
483    if (es < ES_16 || es > ES_64) {
484        gen_program_exception(s, PGM_SPECIFICATION);
485        return DISAS_NORETURN;
486    }
487
488    tmp = tcg_temp_new_i64();
489    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
490    gen_gvec_dup_i64(es, get_field(s, v1), tmp);
491    tcg_temp_free_i64(tmp);
492    return DISAS_NEXT;
493}
494
495static DisasJumpType op_vllebrz(DisasContext *s, DisasOps *o)
496{
497    const uint8_t m3 = get_field(s, m3);
498    TCGv_i64 tmp;
499    int es, lshift;
500
501    switch (m3) {
502    case ES_16:
503    case ES_32:
504    case ES_64:
505        es = m3;
506        lshift = 0;
507        break;
508    case 6:
509        es = ES_32;
510        lshift = 32;
511        break;
512    default:
513        gen_program_exception(s, PGM_SPECIFICATION);
514        return DISAS_NORETURN;
515    }
516
517    tmp = tcg_temp_new_i64();
518    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
519    tcg_gen_shli_i64(tmp, tmp, lshift);
520
521    write_vec_element_i64(tmp, get_field(s, v1), 0, ES_64);
522    write_vec_element_i64(tcg_constant_i64(0), get_field(s, v1), 1, ES_64);
523    tcg_temp_free_i64(tmp);
524    return DISAS_NEXT;
525}
526
527static DisasJumpType op_vlbr(DisasContext *s, DisasOps *o)
528{
529    const uint8_t es = get_field(s, m3);
530    TCGv_i64 t0, t1;
531
532    if (es < ES_16 || es > ES_128) {
533        gen_program_exception(s, PGM_SPECIFICATION);
534        return DISAS_NORETURN;
535    }
536
537    t0 = tcg_temp_new_i64();
538    t1 = tcg_temp_new_i64();
539
540
541    if (es == ES_128) {
542        tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
543        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
544        tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
545        goto write;
546    }
547
548    /* Begin with byte reversed doublewords... */
549    tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
550    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
551    tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
552
553    /*
554     * For 16 and 32-bit elements, the doubleword bswap also reversed
555     * the order of the elements.  Perform a larger order swap to put
556     * them back into place.  For the 128-bit "element", finish the
557     * bswap by swapping the doublewords.
558     */
559    switch (es) {
560    case ES_16:
561        tcg_gen_hswap_i64(t0, t0);
562        tcg_gen_hswap_i64(t1, t1);
563        break;
564    case ES_32:
565        tcg_gen_wswap_i64(t0, t0);
566        tcg_gen_wswap_i64(t1, t1);
567        break;
568    case ES_64:
569        break;
570    default:
571        g_assert_not_reached();
572    }
573
574write:
575    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
576    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
577
578    tcg_temp_free(t0);
579    tcg_temp_free(t1);
580    return DISAS_NEXT;
581}
582
583static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
584{
585    const uint8_t es = s->insn->data;
586    const uint8_t enr = get_field(s, m3);
587    TCGv_i64 tmp;
588
589    if (!valid_vec_element(enr, es)) {
590        gen_program_exception(s, PGM_SPECIFICATION);
591        return DISAS_NORETURN;
592    }
593
594    tmp = tcg_temp_new_i64();
595    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
596    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
597    tcg_temp_free_i64(tmp);
598    return DISAS_NEXT;
599}
600
601static DisasJumpType op_vlei(DisasContext *s, DisasOps *o)
602{
603    const uint8_t es = s->insn->data;
604    const uint8_t enr = get_field(s, m3);
605    TCGv_i64 tmp;
606
607    if (!valid_vec_element(enr, es)) {
608        gen_program_exception(s, PGM_SPECIFICATION);
609        return DISAS_NORETURN;
610    }
611
612    tmp = tcg_const_i64((int16_t)get_field(s, i2));
613    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
614    tcg_temp_free_i64(tmp);
615    return DISAS_NEXT;
616}
617
618static DisasJumpType op_vler(DisasContext *s, DisasOps *o)
619{
620    const uint8_t es = get_field(s, m3);
621
622    if (es < ES_16 || es > ES_64) {
623        gen_program_exception(s, PGM_SPECIFICATION);
624        return DISAS_NORETURN;
625    }
626
627    TCGv_i64 t0 = tcg_temp_new_i64();
628    TCGv_i64 t1 = tcg_temp_new_i64();
629
630    /* Begin with the two doublewords swapped... */
631    tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
632    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
633    tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ);
634
635    /* ... then swap smaller elements within the doublewords as required. */
636    switch (es) {
637    case MO_16:
638        tcg_gen_hswap_i64(t1, t1);
639        tcg_gen_hswap_i64(t0, t0);
640        break;
641    case MO_32:
642        tcg_gen_wswap_i64(t1, t1);
643        tcg_gen_wswap_i64(t0, t0);
644        break;
645    case MO_64:
646        break;
647    default:
648        g_assert_not_reached();
649    }
650
651    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
652    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
653    tcg_temp_free(t0);
654    tcg_temp_free(t1);
655    return DISAS_NEXT;
656}
657
658static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o)
659{
660    const uint8_t es = get_field(s, m4);
661    TCGv_ptr ptr;
662
663    if (es > ES_64) {
664        gen_program_exception(s, PGM_SPECIFICATION);
665        return DISAS_NORETURN;
666    }
667
668    /* fast path if we don't need the register content */
669    if (!get_field(s, b2)) {
670        uint8_t enr = get_field(s, d2) & (NUM_VEC_ELEMENTS(es) - 1);
671
672        read_vec_element_i64(o->out, get_field(s, v3), enr, es);
673        return DISAS_NEXT;
674    }
675
676    ptr = tcg_temp_new_ptr();
677    get_vec_element_ptr_i64(ptr, get_field(s, v3), o->addr1, es);
678    switch (es) {
679    case ES_8:
680        tcg_gen_ld8u_i64(o->out, ptr, 0);
681        break;
682    case ES_16:
683        tcg_gen_ld16u_i64(o->out, ptr, 0);
684        break;
685    case ES_32:
686        tcg_gen_ld32u_i64(o->out, ptr, 0);
687        break;
688    case ES_64:
689        tcg_gen_ld_i64(o->out, ptr, 0);
690        break;
691    default:
692        g_assert_not_reached();
693    }
694    tcg_temp_free_ptr(ptr);
695
696    return DISAS_NEXT;
697}
698
699static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
700{
701    uint8_t es = get_field(s, m3);
702    uint8_t enr;
703    TCGv_i64 t;
704
705    switch (es) {
706    /* rightmost sub-element of leftmost doubleword */
707    case ES_8:
708        enr = 7;
709        break;
710    case ES_16:
711        enr = 3;
712        break;
713    case ES_32:
714        enr = 1;
715        break;
716    case ES_64:
717        enr = 0;
718        break;
719    /* leftmost sub-element of leftmost doubleword */
720    case 6:
721        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
722            es = ES_32;
723            enr = 0;
724            break;
725        }
726        /* fallthrough */
727    default:
728        gen_program_exception(s, PGM_SPECIFICATION);
729        return DISAS_NORETURN;
730    }
731
732    t = tcg_temp_new_i64();
733    tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es);
734    gen_gvec_dup_imm(es, get_field(s, v1), 0);
735    write_vec_element_i64(t, get_field(s, v1), enr, es);
736    tcg_temp_free_i64(t);
737    return DISAS_NEXT;
738}
739
740static DisasJumpType op_vlm(DisasContext *s, DisasOps *o)
741{
742    const uint8_t v3 = get_field(s, v3);
743    uint8_t v1 = get_field(s, v1);
744    TCGv_i64 t0, t1;
745
746    if (v3 < v1 || (v3 - v1 + 1) > 16) {
747        gen_program_exception(s, PGM_SPECIFICATION);
748        return DISAS_NORETURN;
749    }
750
751    /*
752     * Check for possible access exceptions by trying to load the last
753     * element. The first element will be checked first next.
754     */
755    t0 = tcg_temp_new_i64();
756    t1 = tcg_temp_new_i64();
757    gen_addi_and_wrap_i64(s, t0, o->addr1, (v3 - v1) * 16 + 8);
758    tcg_gen_qemu_ld_i64(t0, t0, get_mem_index(s), MO_TEUQ);
759
760    for (;; v1++) {
761        tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
762        write_vec_element_i64(t1, v1, 0, ES_64);
763        if (v1 == v3) {
764            break;
765        }
766        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
767        tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
768        write_vec_element_i64(t1, v1, 1, ES_64);
769        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
770    }
771
772    /* Store the last element, loaded first */
773    write_vec_element_i64(t0, v1, 1, ES_64);
774
775    tcg_temp_free_i64(t0);
776    tcg_temp_free_i64(t1);
777    return DISAS_NEXT;
778}
779
780static DisasJumpType op_vlbb(DisasContext *s, DisasOps *o)
781{
782    const int64_t block_size = (1ull << (get_field(s, m3) + 6));
783    const int v1_offs = vec_full_reg_offset(get_field(s, v1));
784    TCGv_ptr a0;
785    TCGv_i64 bytes;
786
787    if (get_field(s, m3) > 6) {
788        gen_program_exception(s, PGM_SPECIFICATION);
789        return DISAS_NORETURN;
790    }
791
792    bytes = tcg_temp_new_i64();
793    a0 = tcg_temp_new_ptr();
794    /* calculate the number of bytes until the next block boundary */
795    tcg_gen_ori_i64(bytes, o->addr1, -block_size);
796    tcg_gen_neg_i64(bytes, bytes);
797
798    tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
799    gen_helper_vll(cpu_env, a0, o->addr1, bytes);
800    tcg_temp_free_i64(bytes);
801    tcg_temp_free_ptr(a0);
802    return DISAS_NEXT;
803}
804
805static DisasJumpType op_vlvg(DisasContext *s, DisasOps *o)
806{
807    const uint8_t es = get_field(s, m4);
808    TCGv_ptr ptr;
809
810    if (es > ES_64) {
811        gen_program_exception(s, PGM_SPECIFICATION);
812        return DISAS_NORETURN;
813    }
814
815    /* fast path if we don't need the register content */
816    if (!get_field(s, b2)) {
817        uint8_t enr = get_field(s, d2) & (NUM_VEC_ELEMENTS(es) - 1);
818
819        write_vec_element_i64(o->in2, get_field(s, v1), enr, es);
820        return DISAS_NEXT;
821    }
822
823    ptr = tcg_temp_new_ptr();
824    get_vec_element_ptr_i64(ptr, get_field(s, v1), o->addr1, es);
825    switch (es) {
826    case ES_8:
827        tcg_gen_st8_i64(o->in2, ptr, 0);
828        break;
829    case ES_16:
830        tcg_gen_st16_i64(o->in2, ptr, 0);
831        break;
832    case ES_32:
833        tcg_gen_st32_i64(o->in2, ptr, 0);
834        break;
835    case ES_64:
836        tcg_gen_st_i64(o->in2, ptr, 0);
837        break;
838    default:
839        g_assert_not_reached();
840    }
841    tcg_temp_free_ptr(ptr);
842
843    return DISAS_NEXT;
844}
845
846static DisasJumpType op_vlvgp(DisasContext *s, DisasOps *o)
847{
848    write_vec_element_i64(o->in1, get_field(s, v1), 0, ES_64);
849    write_vec_element_i64(o->in2, get_field(s, v1), 1, ES_64);
850    return DISAS_NEXT;
851}
852
853static DisasJumpType op_vll(DisasContext *s, DisasOps *o)
854{
855    const int v1_offs = vec_full_reg_offset(get_field(s, v1));
856    TCGv_ptr a0 = tcg_temp_new_ptr();
857
858    /* convert highest index into an actual length */
859    tcg_gen_addi_i64(o->in2, o->in2, 1);
860    tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
861    gen_helper_vll(cpu_env, a0, o->addr1, o->in2);
862    tcg_temp_free_ptr(a0);
863    return DISAS_NEXT;
864}
865
866static DisasJumpType op_vmr(DisasContext *s, DisasOps *o)
867{
868    const uint8_t v1 = get_field(s, v1);
869    const uint8_t v2 = get_field(s, v2);
870    const uint8_t v3 = get_field(s, v3);
871    const uint8_t es = get_field(s, m4);
872    int dst_idx, src_idx;
873    TCGv_i64 tmp;
874
875    if (es > ES_64) {
876        gen_program_exception(s, PGM_SPECIFICATION);
877        return DISAS_NORETURN;
878    }
879
880    tmp = tcg_temp_new_i64();
881    if (s->fields.op2 == 0x61) {
882        /* iterate backwards to avoid overwriting data we might need later */
883        for (dst_idx = NUM_VEC_ELEMENTS(es) - 1; dst_idx >= 0; dst_idx--) {
884            src_idx = dst_idx / 2;
885            if (dst_idx % 2 == 0) {
886                read_vec_element_i64(tmp, v2, src_idx, es);
887            } else {
888                read_vec_element_i64(tmp, v3, src_idx, es);
889            }
890            write_vec_element_i64(tmp, v1, dst_idx, es);
891        }
892    } else {
893        /* iterate forward to avoid overwriting data we might need later */
894        for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(es); dst_idx++) {
895            src_idx = (dst_idx + NUM_VEC_ELEMENTS(es)) / 2;
896            if (dst_idx % 2 == 0) {
897                read_vec_element_i64(tmp, v2, src_idx, es);
898            } else {
899                read_vec_element_i64(tmp, v3, src_idx, es);
900            }
901            write_vec_element_i64(tmp, v1, dst_idx, es);
902        }
903    }
904    tcg_temp_free_i64(tmp);
905    return DISAS_NEXT;
906}
907
908static DisasJumpType op_vpk(DisasContext *s, DisasOps *o)
909{
910    const uint8_t v1 = get_field(s, v1);
911    const uint8_t v2 = get_field(s, v2);
912    const uint8_t v3 = get_field(s, v3);
913    const uint8_t es = get_field(s, m4);
914    static gen_helper_gvec_3 * const vpk[3] = {
915        gen_helper_gvec_vpk16,
916        gen_helper_gvec_vpk32,
917        gen_helper_gvec_vpk64,
918    };
919     static gen_helper_gvec_3 * const vpks[3] = {
920        gen_helper_gvec_vpks16,
921        gen_helper_gvec_vpks32,
922        gen_helper_gvec_vpks64,
923    };
924    static gen_helper_gvec_3_ptr * const vpks_cc[3] = {
925        gen_helper_gvec_vpks_cc16,
926        gen_helper_gvec_vpks_cc32,
927        gen_helper_gvec_vpks_cc64,
928    };
929    static gen_helper_gvec_3 * const vpkls[3] = {
930        gen_helper_gvec_vpkls16,
931        gen_helper_gvec_vpkls32,
932        gen_helper_gvec_vpkls64,
933    };
934    static gen_helper_gvec_3_ptr * const vpkls_cc[3] = {
935        gen_helper_gvec_vpkls_cc16,
936        gen_helper_gvec_vpkls_cc32,
937        gen_helper_gvec_vpkls_cc64,
938    };
939
940    if (es == ES_8 || es > ES_64) {
941        gen_program_exception(s, PGM_SPECIFICATION);
942        return DISAS_NORETURN;
943    }
944
945    switch (s->fields.op2) {
946    case 0x97:
947        if (get_field(s, m5) & 0x1) {
948            gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]);
949            set_cc_static(s);
950        } else {
951            gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]);
952        }
953        break;
954    case 0x95:
955        if (get_field(s, m5) & 0x1) {
956            gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]);
957            set_cc_static(s);
958        } else {
959            gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]);
960        }
961        break;
962    case 0x94:
963        /* If sources and destination dont't overlap -> fast path */
964        if (v1 != v2 && v1 != v3) {
965            const uint8_t src_es = get_field(s, m4);
966            const uint8_t dst_es = src_es - 1;
967            TCGv_i64 tmp = tcg_temp_new_i64();
968            int dst_idx, src_idx;
969
970            for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
971                src_idx = dst_idx;
972                if (src_idx < NUM_VEC_ELEMENTS(src_es)) {
973                    read_vec_element_i64(tmp, v2, src_idx, src_es);
974                } else {
975                    src_idx -= NUM_VEC_ELEMENTS(src_es);
976                    read_vec_element_i64(tmp, v3, src_idx, src_es);
977                }
978                write_vec_element_i64(tmp, v1, dst_idx, dst_es);
979            }
980            tcg_temp_free_i64(tmp);
981        } else {
982            gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]);
983        }
984        break;
985    default:
986        g_assert_not_reached();
987    }
988    return DISAS_NEXT;
989}
990
991static DisasJumpType op_vperm(DisasContext *s, DisasOps *o)
992{
993    gen_gvec_4_ool(get_field(s, v1), get_field(s, v2),
994                   get_field(s, v3), get_field(s, v4),
995                   0, gen_helper_gvec_vperm);
996    return DISAS_NEXT;
997}
998
999static DisasJumpType op_vpdi(DisasContext *s, DisasOps *o)
1000{
1001    const uint8_t i2 = extract32(get_field(s, m4), 2, 1);
1002    const uint8_t i3 = extract32(get_field(s, m4), 0, 1);
1003    TCGv_i64 t0 = tcg_temp_new_i64();
1004    TCGv_i64 t1 = tcg_temp_new_i64();
1005
1006    read_vec_element_i64(t0, get_field(s, v2), i2, ES_64);
1007    read_vec_element_i64(t1, get_field(s, v3), i3, ES_64);
1008    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
1009    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
1010    tcg_temp_free_i64(t0);
1011    tcg_temp_free_i64(t1);
1012    return DISAS_NEXT;
1013}
1014
1015static DisasJumpType op_vrep(DisasContext *s, DisasOps *o)
1016{
1017    const uint8_t enr = get_field(s, i2);
1018    const uint8_t es = get_field(s, m4);
1019
1020    if (es > ES_64 || !valid_vec_element(enr, es)) {
1021        gen_program_exception(s, PGM_SPECIFICATION);
1022        return DISAS_NORETURN;
1023    }
1024
1025    tcg_gen_gvec_dup_mem(es, vec_full_reg_offset(get_field(s, v1)),
1026                         vec_reg_offset(get_field(s, v3), enr, es),
1027                         16, 16);
1028    return DISAS_NEXT;
1029}
1030
1031static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o)
1032{
1033    const int64_t data = (int16_t)get_field(s, i2);
1034    const uint8_t es = get_field(s, m3);
1035
1036    if (es > ES_64) {
1037        gen_program_exception(s, PGM_SPECIFICATION);
1038        return DISAS_NORETURN;
1039    }
1040
1041    gen_gvec_dup_imm(es, get_field(s, v1), data);
1042    return DISAS_NEXT;
1043}
1044
1045static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
1046{
1047    const uint8_t es = s->insn->data;
1048    const uint8_t enr = get_field(s, m3);
1049    TCGv_i64 tmp;
1050
1051    if (!valid_vec_element(enr, es)) {
1052        gen_program_exception(s, PGM_SPECIFICATION);
1053        return DISAS_NORETURN;
1054    }
1055
1056    tmp = tcg_temp_new_i64();
1057    read_vec_element_i64(tmp, get_field(s, v2), enr, es);
1058    tcg_gen_add_i64(o->addr1, o->addr1, tmp);
1059    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
1060
1061    read_vec_element_i64(tmp, get_field(s, v1), enr, es);
1062    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
1063    tcg_temp_free_i64(tmp);
1064    return DISAS_NEXT;
1065}
1066
1067static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
1068{
1069    gen_gvec_fn_4(bitsel, ES_8, get_field(s, v1),
1070                  get_field(s, v4), get_field(s, v2),
1071                  get_field(s, v3));
1072    return DISAS_NEXT;
1073}
1074
1075static DisasJumpType op_vseg(DisasContext *s, DisasOps *o)
1076{
1077    const uint8_t es = get_field(s, m3);
1078    int idx1, idx2;
1079    TCGv_i64 tmp;
1080
1081    switch (es) {
1082    case ES_8:
1083        idx1 = 7;
1084        idx2 = 15;
1085        break;
1086    case ES_16:
1087        idx1 = 3;
1088        idx2 = 7;
1089        break;
1090    case ES_32:
1091        idx1 = 1;
1092        idx2 = 3;
1093        break;
1094    default:
1095        gen_program_exception(s, PGM_SPECIFICATION);
1096        return DISAS_NORETURN;
1097    }
1098
1099    tmp = tcg_temp_new_i64();
1100    read_vec_element_i64(tmp, get_field(s, v2), idx1, es | MO_SIGN);
1101    write_vec_element_i64(tmp, get_field(s, v1), 0, ES_64);
1102    read_vec_element_i64(tmp, get_field(s, v2), idx2, es | MO_SIGN);
1103    write_vec_element_i64(tmp, get_field(s, v1), 1, ES_64);
1104    tcg_temp_free_i64(tmp);
1105    return DISAS_NEXT;
1106}
1107
1108static DisasJumpType op_vst(DisasContext *s, DisasOps *o)
1109{
1110    TCGv_i64 tmp = tcg_const_i64(16);
1111
1112    /* Probe write access before actually modifying memory */
1113    gen_helper_probe_write_access(cpu_env, o->addr1, tmp);
1114
1115    read_vec_element_i64(tmp,  get_field(s, v1), 0, ES_64);
1116    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEUQ);
1117    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1118    read_vec_element_i64(tmp,  get_field(s, v1), 1, ES_64);
1119    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEUQ);
1120    tcg_temp_free_i64(tmp);
1121    return DISAS_NEXT;
1122}
1123
1124static DisasJumpType op_vstebr(DisasContext *s, DisasOps *o)
1125{
1126    const uint8_t es = s->insn->data;
1127    const uint8_t enr = get_field(s, m3);
1128    TCGv_i64 tmp;
1129
1130    if (!valid_vec_element(enr, es)) {
1131        gen_program_exception(s, PGM_SPECIFICATION);
1132        return DISAS_NORETURN;
1133    }
1134
1135    tmp = tcg_temp_new_i64();
1136    read_vec_element_i64(tmp, get_field(s, v1), enr, es);
1137    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es);
1138    tcg_temp_free_i64(tmp);
1139    return DISAS_NEXT;
1140}
1141
1142static DisasJumpType op_vstbr(DisasContext *s, DisasOps *o)
1143{
1144    const uint8_t es = get_field(s, m3);
1145    TCGv_i64 t0, t1;
1146
1147    if (es < ES_16 || es > ES_128) {
1148        gen_program_exception(s, PGM_SPECIFICATION);
1149        return DISAS_NORETURN;
1150    }
1151
1152    /* Probe write access before actually modifying memory */
1153    gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16));
1154
1155    t0 = tcg_temp_new_i64();
1156    t1 = tcg_temp_new_i64();
1157
1158
1159    if (es == ES_128) {
1160        read_vec_element_i64(t1, get_field(s, v1), 0, ES_64);
1161        read_vec_element_i64(t0, get_field(s, v1), 1, ES_64);
1162        goto write;
1163    }
1164
1165    read_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
1166    read_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
1167
1168    /*
1169     * For 16 and 32-bit elements, the doubleword bswap below will
1170     * reverse the order of the elements.  Perform a larger order
1171     * swap to put them back into place.  For the 128-bit "element",
1172     * finish the bswap by swapping the doublewords.
1173     */
1174    switch (es) {
1175    case MO_16:
1176        tcg_gen_hswap_i64(t0, t0);
1177        tcg_gen_hswap_i64(t1, t1);
1178        break;
1179    case MO_32:
1180        tcg_gen_wswap_i64(t0, t0);
1181        tcg_gen_wswap_i64(t1, t1);
1182        break;
1183    case MO_64:
1184        break;
1185    default:
1186        g_assert_not_reached();
1187    }
1188
1189write:
1190    tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
1191    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1192    tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
1193
1194    tcg_temp_free(t0);
1195    tcg_temp_free(t1);
1196    return DISAS_NEXT;
1197}
1198
1199static DisasJumpType op_vste(DisasContext *s, DisasOps *o)
1200{
1201    const uint8_t es = s->insn->data;
1202    const uint8_t enr = get_field(s, m3);
1203    TCGv_i64 tmp;
1204
1205    if (!valid_vec_element(enr, es)) {
1206        gen_program_exception(s, PGM_SPECIFICATION);
1207        return DISAS_NORETURN;
1208    }
1209
1210    tmp = tcg_temp_new_i64();
1211    read_vec_element_i64(tmp, get_field(s, v1), enr, es);
1212    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
1213    tcg_temp_free_i64(tmp);
1214    return DISAS_NEXT;
1215}
1216
1217static DisasJumpType op_vster(DisasContext *s, DisasOps *o)
1218{
1219    const uint8_t es = get_field(s, m3);
1220    TCGv_i64 t0, t1;
1221
1222    if (es < ES_16 || es > ES_64) {
1223        gen_program_exception(s, PGM_SPECIFICATION);
1224        return DISAS_NORETURN;
1225    }
1226
1227    /* Probe write access before actually modifying memory */
1228    gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16));
1229
1230    /* Begin with the two doublewords swapped... */
1231    t0 = tcg_temp_new_i64();
1232    t1 = tcg_temp_new_i64();
1233    read_vec_element_i64(t1,  get_field(s, v1), 0, ES_64);
1234    read_vec_element_i64(t0,  get_field(s, v1), 1, ES_64);
1235
1236    /* ... then swap smaller elements within the doublewords as required. */
1237    switch (es) {
1238    case MO_16:
1239        tcg_gen_hswap_i64(t1, t1);
1240        tcg_gen_hswap_i64(t0, t0);
1241        break;
1242    case MO_32:
1243        tcg_gen_wswap_i64(t1, t1);
1244        tcg_gen_wswap_i64(t0, t0);
1245        break;
1246    case MO_64:
1247        break;
1248    default:
1249        g_assert_not_reached();
1250    }
1251
1252    tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ);
1253    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1254    tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ);
1255
1256    tcg_temp_free(t0);
1257    tcg_temp_free(t1);
1258    return DISAS_NEXT;
1259}
1260
1261static DisasJumpType op_vstm(DisasContext *s, DisasOps *o)
1262{
1263    const uint8_t v3 = get_field(s, v3);
1264    uint8_t v1 = get_field(s, v1);
1265    TCGv_i64 tmp;
1266
1267    while (v3 < v1 || (v3 - v1 + 1) > 16) {
1268        gen_program_exception(s, PGM_SPECIFICATION);
1269        return DISAS_NORETURN;
1270    }
1271
1272    /* Probe write access before actually modifying memory */
1273    tmp = tcg_const_i64((v3 - v1 + 1) * 16);
1274    gen_helper_probe_write_access(cpu_env, o->addr1, tmp);
1275
1276    for (;; v1++) {
1277        read_vec_element_i64(tmp, v1, 0, ES_64);
1278        tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEUQ);
1279        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1280        read_vec_element_i64(tmp, v1, 1, ES_64);
1281        tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEUQ);
1282        if (v1 == v3) {
1283            break;
1284        }
1285        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
1286    }
1287    tcg_temp_free_i64(tmp);
1288    return DISAS_NEXT;
1289}
1290
1291static DisasJumpType op_vstl(DisasContext *s, DisasOps *o)
1292{
1293    const int v1_offs = vec_full_reg_offset(get_field(s, v1));
1294    TCGv_ptr a0 = tcg_temp_new_ptr();
1295
1296    /* convert highest index into an actual length */
1297    tcg_gen_addi_i64(o->in2, o->in2, 1);
1298    tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
1299    gen_helper_vstl(cpu_env, a0, o->addr1, o->in2);
1300    tcg_temp_free_ptr(a0);
1301    return DISAS_NEXT;
1302}
1303
1304static DisasJumpType op_vup(DisasContext *s, DisasOps *o)
1305{
1306    const bool logical = s->fields.op2 == 0xd4 || s->fields.op2 == 0xd5;
1307    const uint8_t v1 = get_field(s, v1);
1308    const uint8_t v2 = get_field(s, v2);
1309    const uint8_t src_es = get_field(s, m3);
1310    const uint8_t dst_es = src_es + 1;
1311    int dst_idx, src_idx;
1312    TCGv_i64 tmp;
1313
1314    if (src_es > ES_32) {
1315        gen_program_exception(s, PGM_SPECIFICATION);
1316        return DISAS_NORETURN;
1317    }
1318
1319    tmp = tcg_temp_new_i64();
1320    if (s->fields.op2 == 0xd7 || s->fields.op2 == 0xd5) {
1321        /* iterate backwards to avoid overwriting data we might need later */
1322        for (dst_idx = NUM_VEC_ELEMENTS(dst_es) - 1; dst_idx >= 0; dst_idx--) {
1323            src_idx = dst_idx;
1324            read_vec_element_i64(tmp, v2, src_idx,
1325                                 src_es | (logical ? 0 : MO_SIGN));
1326            write_vec_element_i64(tmp, v1, dst_idx, dst_es);
1327        }
1328
1329    } else {
1330        /* iterate forward to avoid overwriting data we might need later */
1331        for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
1332            src_idx = dst_idx + NUM_VEC_ELEMENTS(src_es) / 2;
1333            read_vec_element_i64(tmp, v2, src_idx,
1334                                 src_es | (logical ? 0 : MO_SIGN));
1335            write_vec_element_i64(tmp, v1, dst_idx, dst_es);
1336        }
1337    }
1338    tcg_temp_free_i64(tmp);
1339    return DISAS_NEXT;
1340}
1341
1342static DisasJumpType op_va(DisasContext *s, DisasOps *o)
1343{
1344    const uint8_t es = get_field(s, m4);
1345
1346    if (es > ES_128) {
1347        gen_program_exception(s, PGM_SPECIFICATION);
1348        return DISAS_NORETURN;
1349    } else if (es == ES_128) {
1350        gen_gvec128_3_i64(tcg_gen_add2_i64, get_field(s, v1),
1351                          get_field(s, v2), get_field(s, v3));
1352        return DISAS_NEXT;
1353    }
1354    gen_gvec_fn_3(add, es, get_field(s, v1), get_field(s, v2),
1355                  get_field(s, v3));
1356    return DISAS_NEXT;
1357}
1358
1359static void gen_acc(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, uint8_t es)
1360{
1361    const uint8_t msb_bit_nr = NUM_VEC_ELEMENT_BITS(es) - 1;
1362    TCGv_i64 msb_mask = tcg_const_i64(dup_const(es, 1ull << msb_bit_nr));
1363    TCGv_i64 t1 = tcg_temp_new_i64();
1364    TCGv_i64 t2 = tcg_temp_new_i64();
1365    TCGv_i64 t3 = tcg_temp_new_i64();
1366
1367    /* Calculate the carry into the MSB, ignoring the old MSBs */
1368    tcg_gen_andc_i64(t1, a, msb_mask);
1369    tcg_gen_andc_i64(t2, b, msb_mask);
1370    tcg_gen_add_i64(t1, t1, t2);
1371    /* Calculate the MSB without any carry into it */
1372    tcg_gen_xor_i64(t3, a, b);
1373    /* Calculate the carry out of the MSB in the MSB bit position */
1374    tcg_gen_and_i64(d, a, b);
1375    tcg_gen_and_i64(t1, t1, t3);
1376    tcg_gen_or_i64(d, d, t1);
1377    /* Isolate and shift the carry into position */
1378    tcg_gen_and_i64(d, d, msb_mask);
1379    tcg_gen_shri_i64(d, d, msb_bit_nr);
1380
1381    tcg_temp_free_i64(t1);
1382    tcg_temp_free_i64(t2);
1383    tcg_temp_free_i64(t3);
1384}
1385
1386static void gen_acc8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1387{
1388    gen_acc(d, a, b, ES_8);
1389}
1390
1391static void gen_acc16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1392{
1393    gen_acc(d, a, b, ES_16);
1394}
1395
1396static void gen_acc_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1397{
1398    TCGv_i32 t = tcg_temp_new_i32();
1399
1400    tcg_gen_add_i32(t, a, b);
1401    tcg_gen_setcond_i32(TCG_COND_LTU, d, t, b);
1402    tcg_temp_free_i32(t);
1403}
1404
1405static void gen_acc_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1406{
1407    TCGv_i64 t = tcg_temp_new_i64();
1408
1409    tcg_gen_add_i64(t, a, b);
1410    tcg_gen_setcond_i64(TCG_COND_LTU, d, t, b);
1411    tcg_temp_free_i64(t);
1412}
1413
1414static void gen_acc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
1415                         TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
1416{
1417    TCGv_i64 th = tcg_temp_new_i64();
1418    TCGv_i64 tl = tcg_temp_new_i64();
1419    TCGv_i64 zero = tcg_const_i64(0);
1420
1421    tcg_gen_add2_i64(tl, th, al, zero, bl, zero);
1422    tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
1423    tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
1424    tcg_gen_mov_i64(dh, zero);
1425
1426    tcg_temp_free_i64(th);
1427    tcg_temp_free_i64(tl);
1428    tcg_temp_free_i64(zero);
1429}
1430
1431static DisasJumpType op_vacc(DisasContext *s, DisasOps *o)
1432{
1433    const uint8_t es = get_field(s, m4);
1434    static const GVecGen3 g[4] = {
1435        { .fni8 = gen_acc8_i64, },
1436        { .fni8 = gen_acc16_i64, },
1437        { .fni4 = gen_acc_i32, },
1438        { .fni8 = gen_acc_i64, },
1439    };
1440
1441    if (es > ES_128) {
1442        gen_program_exception(s, PGM_SPECIFICATION);
1443        return DISAS_NORETURN;
1444    } else if (es == ES_128) {
1445        gen_gvec128_3_i64(gen_acc2_i64, get_field(s, v1),
1446                          get_field(s, v2), get_field(s, v3));
1447        return DISAS_NEXT;
1448    }
1449    gen_gvec_3(get_field(s, v1), get_field(s, v2),
1450               get_field(s, v3), &g[es]);
1451    return DISAS_NEXT;
1452}
1453
1454static void gen_ac2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
1455                        TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
1456{
1457    TCGv_i64 tl = tcg_temp_new_i64();
1458    TCGv_i64 th = tcg_const_i64(0);
1459
1460    /* extract the carry only */
1461    tcg_gen_extract_i64(tl, cl, 0, 1);
1462    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
1463    tcg_gen_add2_i64(dl, dh, dl, dh, tl, th);
1464
1465    tcg_temp_free_i64(tl);
1466    tcg_temp_free_i64(th);
1467}
1468
1469static DisasJumpType op_vac(DisasContext *s, DisasOps *o)
1470{
1471    if (get_field(s, m5) != ES_128) {
1472        gen_program_exception(s, PGM_SPECIFICATION);
1473        return DISAS_NORETURN;
1474    }
1475
1476    gen_gvec128_4_i64(gen_ac2_i64, get_field(s, v1),
1477                      get_field(s, v2), get_field(s, v3),
1478                      get_field(s, v4));
1479    return DISAS_NEXT;
1480}
1481
1482static void gen_accc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
1483                          TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
1484{
1485    TCGv_i64 tl = tcg_temp_new_i64();
1486    TCGv_i64 th = tcg_temp_new_i64();
1487    TCGv_i64 zero = tcg_const_i64(0);
1488
1489    tcg_gen_andi_i64(tl, cl, 1);
1490    tcg_gen_add2_i64(tl, th, tl, zero, al, zero);
1491    tcg_gen_add2_i64(tl, th, tl, th, bl, zero);
1492    tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
1493    tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
1494    tcg_gen_mov_i64(dh, zero);
1495
1496    tcg_temp_free_i64(tl);
1497    tcg_temp_free_i64(th);
1498    tcg_temp_free_i64(zero);
1499}
1500
1501static DisasJumpType op_vaccc(DisasContext *s, DisasOps *o)
1502{
1503    if (get_field(s, m5) != ES_128) {
1504        gen_program_exception(s, PGM_SPECIFICATION);
1505        return DISAS_NORETURN;
1506    }
1507
1508    gen_gvec128_4_i64(gen_accc2_i64, get_field(s, v1),
1509                      get_field(s, v2), get_field(s, v3),
1510                      get_field(s, v4));
1511    return DISAS_NEXT;
1512}
1513
1514static DisasJumpType op_vn(DisasContext *s, DisasOps *o)
1515{
1516    gen_gvec_fn_3(and, ES_8, get_field(s, v1), get_field(s, v2),
1517                  get_field(s, v3));
1518    return DISAS_NEXT;
1519}
1520
1521static DisasJumpType op_vnc(DisasContext *s, DisasOps *o)
1522{
1523    gen_gvec_fn_3(andc, ES_8, get_field(s, v1),
1524                  get_field(s, v2), get_field(s, v3));
1525    return DISAS_NEXT;
1526}
1527
1528static void gen_avg_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1529{
1530    TCGv_i64 t0 = tcg_temp_new_i64();
1531    TCGv_i64 t1 = tcg_temp_new_i64();
1532
1533    tcg_gen_ext_i32_i64(t0, a);
1534    tcg_gen_ext_i32_i64(t1, b);
1535    tcg_gen_add_i64(t0, t0, t1);
1536    tcg_gen_addi_i64(t0, t0, 1);
1537    tcg_gen_shri_i64(t0, t0, 1);
1538    tcg_gen_extrl_i64_i32(d, t0);
1539
1540    tcg_temp_free(t0);
1541    tcg_temp_free(t1);
1542}
1543
1544static void gen_avg_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl)
1545{
1546    TCGv_i64 dh = tcg_temp_new_i64();
1547    TCGv_i64 ah = tcg_temp_new_i64();
1548    TCGv_i64 bh = tcg_temp_new_i64();
1549
1550    /* extending the sign by one bit is sufficient */
1551    tcg_gen_extract_i64(ah, al, 63, 1);
1552    tcg_gen_extract_i64(bh, bl, 63, 1);
1553    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
1554    gen_addi2_i64(dl, dh, dl, dh, 1);
1555    tcg_gen_extract2_i64(dl, dl, dh, 1);
1556
1557    tcg_temp_free_i64(dh);
1558    tcg_temp_free_i64(ah);
1559    tcg_temp_free_i64(bh);
1560}
1561
1562static DisasJumpType op_vavg(DisasContext *s, DisasOps *o)
1563{
1564    const uint8_t es = get_field(s, m4);
1565    static const GVecGen3 g[4] = {
1566        { .fno = gen_helper_gvec_vavg8, },
1567        { .fno = gen_helper_gvec_vavg16, },
1568        { .fni4 = gen_avg_i32, },
1569        { .fni8 = gen_avg_i64, },
1570    };
1571
1572    if (es > ES_64) {
1573        gen_program_exception(s, PGM_SPECIFICATION);
1574        return DISAS_NORETURN;
1575    }
1576    gen_gvec_3(get_field(s, v1), get_field(s, v2),
1577               get_field(s, v3), &g[es]);
1578    return DISAS_NEXT;
1579}
1580
1581static void gen_avgl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1582{
1583    TCGv_i64 t0 = tcg_temp_new_i64();
1584    TCGv_i64 t1 = tcg_temp_new_i64();
1585
1586    tcg_gen_extu_i32_i64(t0, a);
1587    tcg_gen_extu_i32_i64(t1, b);
1588    tcg_gen_add_i64(t0, t0, t1);
1589    tcg_gen_addi_i64(t0, t0, 1);
1590    tcg_gen_shri_i64(t0, t0, 1);
1591    tcg_gen_extrl_i64_i32(d, t0);
1592
1593    tcg_temp_free(t0);
1594    tcg_temp_free(t1);
1595}
1596
1597static void gen_avgl_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl)
1598{
1599    TCGv_i64 dh = tcg_temp_new_i64();
1600    TCGv_i64 zero = tcg_const_i64(0);
1601
1602    tcg_gen_add2_i64(dl, dh, al, zero, bl, zero);
1603    gen_addi2_i64(dl, dh, dl, dh, 1);
1604    tcg_gen_extract2_i64(dl, dl, dh, 1);
1605
1606    tcg_temp_free_i64(dh);
1607    tcg_temp_free_i64(zero);
1608}
1609
1610static DisasJumpType op_vavgl(DisasContext *s, DisasOps *o)
1611{
1612    const uint8_t es = get_field(s, m4);
1613    static const GVecGen3 g[4] = {
1614        { .fno = gen_helper_gvec_vavgl8, },
1615        { .fno = gen_helper_gvec_vavgl16, },
1616        { .fni4 = gen_avgl_i32, },
1617        { .fni8 = gen_avgl_i64, },
1618    };
1619
1620    if (es > ES_64) {
1621        gen_program_exception(s, PGM_SPECIFICATION);
1622        return DISAS_NORETURN;
1623    }
1624    gen_gvec_3(get_field(s, v1), get_field(s, v2),
1625               get_field(s, v3), &g[es]);
1626    return DISAS_NEXT;
1627}
1628
1629static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o)
1630{
1631    TCGv_i32 tmp = tcg_temp_new_i32();
1632    TCGv_i32 sum = tcg_temp_new_i32();
1633    int i;
1634
1635    read_vec_element_i32(sum, get_field(s, v3), 1, ES_32);
1636    for (i = 0; i < 4; i++) {
1637        read_vec_element_i32(tmp, get_field(s, v2), i, ES_32);
1638        tcg_gen_add2_i32(tmp, sum, sum, sum, tmp, tmp);
1639    }
1640    gen_gvec_dup_imm(ES_32, get_field(s, v1), 0);
1641    write_vec_element_i32(sum, get_field(s, v1), 1, ES_32);
1642
1643    tcg_temp_free_i32(tmp);
1644    tcg_temp_free_i32(sum);
1645    return DISAS_NEXT;
1646}
1647
1648static DisasJumpType op_vec(DisasContext *s, DisasOps *o)
1649{
1650    uint8_t es = get_field(s, m3);
1651    const uint8_t enr = NUM_VEC_ELEMENTS(es) / 2 - 1;
1652
1653    if (es > ES_64) {
1654        gen_program_exception(s, PGM_SPECIFICATION);
1655        return DISAS_NORETURN;
1656    }
1657    if (s->fields.op2 == 0xdb) {
1658        es |= MO_SIGN;
1659    }
1660
1661    o->in1 = tcg_temp_new_i64();
1662    o->in2 = tcg_temp_new_i64();
1663    read_vec_element_i64(o->in1, get_field(s, v1), enr, es);
1664    read_vec_element_i64(o->in2, get_field(s, v2), enr, es);
1665    return DISAS_NEXT;
1666}
1667
1668static DisasJumpType op_vc(DisasContext *s, DisasOps *o)
1669{
1670    const uint8_t es = get_field(s, m4);
1671    TCGCond cond = s->insn->data;
1672
1673    if (es > ES_64) {
1674        gen_program_exception(s, PGM_SPECIFICATION);
1675        return DISAS_NORETURN;
1676    }
1677
1678    tcg_gen_gvec_cmp(cond, es,
1679                     vec_full_reg_offset(get_field(s, v1)),
1680                     vec_full_reg_offset(get_field(s, v2)),
1681                     vec_full_reg_offset(get_field(s, v3)), 16, 16);
1682    if (get_field(s, m5) & 0x1) {
1683        TCGv_i64 low = tcg_temp_new_i64();
1684        TCGv_i64 high = tcg_temp_new_i64();
1685
1686        read_vec_element_i64(high, get_field(s, v1), 0, ES_64);
1687        read_vec_element_i64(low, get_field(s, v1), 1, ES_64);
1688        gen_op_update2_cc_i64(s, CC_OP_VC, low, high);
1689
1690        tcg_temp_free_i64(low);
1691        tcg_temp_free_i64(high);
1692    }
1693    return DISAS_NEXT;
1694}
1695
1696static void gen_clz_i32(TCGv_i32 d, TCGv_i32 a)
1697{
1698    tcg_gen_clzi_i32(d, a, 32);
1699}
1700
1701static void gen_clz_i64(TCGv_i64 d, TCGv_i64 a)
1702{
1703    tcg_gen_clzi_i64(d, a, 64);
1704}
1705
1706static DisasJumpType op_vclz(DisasContext *s, DisasOps *o)
1707{
1708    const uint8_t es = get_field(s, m3);
1709    static const GVecGen2 g[4] = {
1710        { .fno = gen_helper_gvec_vclz8, },
1711        { .fno = gen_helper_gvec_vclz16, },
1712        { .fni4 = gen_clz_i32, },
1713        { .fni8 = gen_clz_i64, },
1714    };
1715
1716    if (es > ES_64) {
1717        gen_program_exception(s, PGM_SPECIFICATION);
1718        return DISAS_NORETURN;
1719    }
1720    gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]);
1721    return DISAS_NEXT;
1722}
1723
1724static void gen_ctz_i32(TCGv_i32 d, TCGv_i32 a)
1725{
1726    tcg_gen_ctzi_i32(d, a, 32);
1727}
1728
1729static void gen_ctz_i64(TCGv_i64 d, TCGv_i64 a)
1730{
1731    tcg_gen_ctzi_i64(d, a, 64);
1732}
1733
1734static DisasJumpType op_vctz(DisasContext *s, DisasOps *o)
1735{
1736    const uint8_t es = get_field(s, m3);
1737    static const GVecGen2 g[4] = {
1738        { .fno = gen_helper_gvec_vctz8, },
1739        { .fno = gen_helper_gvec_vctz16, },
1740        { .fni4 = gen_ctz_i32, },
1741        { .fni8 = gen_ctz_i64, },
1742    };
1743
1744    if (es > ES_64) {
1745        gen_program_exception(s, PGM_SPECIFICATION);
1746        return DISAS_NORETURN;
1747    }
1748    gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]);
1749    return DISAS_NEXT;
1750}
1751
1752static DisasJumpType op_vx(DisasContext *s, DisasOps *o)
1753{
1754    gen_gvec_fn_3(xor, ES_8, get_field(s, v1), get_field(s, v2),
1755                 get_field(s, v3));
1756    return DISAS_NEXT;
1757}
1758
1759static DisasJumpType op_vgfm(DisasContext *s, DisasOps *o)
1760{
1761    const uint8_t es = get_field(s, m4);
1762    static const GVecGen3 g[4] = {
1763        { .fno = gen_helper_gvec_vgfm8, },
1764        { .fno = gen_helper_gvec_vgfm16, },
1765        { .fno = gen_helper_gvec_vgfm32, },
1766        { .fno = gen_helper_gvec_vgfm64, },
1767    };
1768
1769    if (es > ES_64) {
1770        gen_program_exception(s, PGM_SPECIFICATION);
1771        return DISAS_NORETURN;
1772    }
1773    gen_gvec_3(get_field(s, v1), get_field(s, v2),
1774               get_field(s, v3), &g[es]);
1775    return DISAS_NEXT;
1776}
1777
1778static DisasJumpType op_vgfma(DisasContext *s, DisasOps *o)
1779{
1780    const uint8_t es = get_field(s, m5);
1781    static const GVecGen4 g[4] = {
1782        { .fno = gen_helper_gvec_vgfma8, },
1783        { .fno = gen_helper_gvec_vgfma16, },
1784        { .fno = gen_helper_gvec_vgfma32, },
1785        { .fno = gen_helper_gvec_vgfma64, },
1786    };
1787
1788    if (es > ES_64) {
1789        gen_program_exception(s, PGM_SPECIFICATION);
1790        return DISAS_NORETURN;
1791    }
1792    gen_gvec_4(get_field(s, v1), get_field(s, v2),
1793               get_field(s, v3), get_field(s, v4), &g[es]);
1794    return DISAS_NEXT;
1795}
1796
1797static DisasJumpType op_vlc(DisasContext *s, DisasOps *o)
1798{
1799    const uint8_t es = get_field(s, m3);
1800
1801    if (es > ES_64) {
1802        gen_program_exception(s, PGM_SPECIFICATION);
1803        return DISAS_NORETURN;
1804    }
1805
1806    gen_gvec_fn_2(neg, es, get_field(s, v1), get_field(s, v2));
1807    return DISAS_NEXT;
1808}
1809
1810static DisasJumpType op_vlp(DisasContext *s, DisasOps *o)
1811{
1812    const uint8_t es = get_field(s, m3);
1813
1814    if (es > ES_64) {
1815        gen_program_exception(s, PGM_SPECIFICATION);
1816        return DISAS_NORETURN;
1817    }
1818
1819    gen_gvec_fn_2(abs, es, get_field(s, v1), get_field(s, v2));
1820    return DISAS_NEXT;
1821}
1822
1823static DisasJumpType op_vmx(DisasContext *s, DisasOps *o)
1824{
1825    const uint8_t v1 = get_field(s, v1);
1826    const uint8_t v2 = get_field(s, v2);
1827    const uint8_t v3 = get_field(s, v3);
1828    const uint8_t es = get_field(s, m4);
1829
1830    if (es > ES_64) {
1831        gen_program_exception(s, PGM_SPECIFICATION);
1832        return DISAS_NORETURN;
1833    }
1834
1835    switch (s->fields.op2) {
1836    case 0xff:
1837        gen_gvec_fn_3(smax, es, v1, v2, v3);
1838        break;
1839    case 0xfd:
1840        gen_gvec_fn_3(umax, es, v1, v2, v3);
1841        break;
1842    case 0xfe:
1843        gen_gvec_fn_3(smin, es, v1, v2, v3);
1844        break;
1845    case 0xfc:
1846        gen_gvec_fn_3(umin, es, v1, v2, v3);
1847        break;
1848    default:
1849        g_assert_not_reached();
1850    }
1851    return DISAS_NEXT;
1852}
1853
1854static void gen_mal_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
1855{
1856    TCGv_i32 t0 = tcg_temp_new_i32();
1857
1858    tcg_gen_mul_i32(t0, a, b);
1859    tcg_gen_add_i32(d, t0, c);
1860
1861    tcg_temp_free_i32(t0);
1862}
1863
1864static void gen_mah_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
1865{
1866    TCGv_i64 t0 = tcg_temp_new_i64();
1867    TCGv_i64 t1 = tcg_temp_new_i64();
1868    TCGv_i64 t2 = tcg_temp_new_i64();
1869
1870    tcg_gen_ext_i32_i64(t0, a);
1871    tcg_gen_ext_i32_i64(t1, b);
1872    tcg_gen_ext_i32_i64(t2, c);
1873    tcg_gen_mul_i64(t0, t0, t1);
1874    tcg_gen_add_i64(t0, t0, t2);
1875    tcg_gen_extrh_i64_i32(d, t0);
1876
1877    tcg_temp_free(t0);
1878    tcg_temp_free(t1);
1879    tcg_temp_free(t2);
1880}
1881
1882static void gen_malh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
1883{
1884    TCGv_i64 t0 = tcg_temp_new_i64();
1885    TCGv_i64 t1 = tcg_temp_new_i64();
1886    TCGv_i64 t2 = tcg_temp_new_i64();
1887
1888    tcg_gen_extu_i32_i64(t0, a);
1889    tcg_gen_extu_i32_i64(t1, b);
1890    tcg_gen_extu_i32_i64(t2, c);
1891    tcg_gen_mul_i64(t0, t0, t1);
1892    tcg_gen_add_i64(t0, t0, t2);
1893    tcg_gen_extrh_i64_i32(d, t0);
1894
1895    tcg_temp_free(t0);
1896    tcg_temp_free(t1);
1897    tcg_temp_free(t2);
1898}
1899
1900static DisasJumpType op_vma(DisasContext *s, DisasOps *o)
1901{
1902    const uint8_t es = get_field(s, m5);
1903    static const GVecGen4 g_vmal[3] = {
1904        { .fno = gen_helper_gvec_vmal8, },
1905        { .fno = gen_helper_gvec_vmal16, },
1906        { .fni4 = gen_mal_i32, },
1907    };
1908    static const GVecGen4 g_vmah[3] = {
1909        { .fno = gen_helper_gvec_vmah8, },
1910        { .fno = gen_helper_gvec_vmah16, },
1911        { .fni4 = gen_mah_i32, },
1912    };
1913    static const GVecGen4 g_vmalh[3] = {
1914        { .fno = gen_helper_gvec_vmalh8, },
1915        { .fno = gen_helper_gvec_vmalh16, },
1916        { .fni4 = gen_malh_i32, },
1917    };
1918    static const GVecGen4 g_vmae[3] = {
1919        { .fno = gen_helper_gvec_vmae8, },
1920        { .fno = gen_helper_gvec_vmae16, },
1921        { .fno = gen_helper_gvec_vmae32, },
1922    };
1923    static const GVecGen4 g_vmale[3] = {
1924        { .fno = gen_helper_gvec_vmale8, },
1925        { .fno = gen_helper_gvec_vmale16, },
1926        { .fno = gen_helper_gvec_vmale32, },
1927    };
1928    static const GVecGen4 g_vmao[3] = {
1929        { .fno = gen_helper_gvec_vmao8, },
1930        { .fno = gen_helper_gvec_vmao16, },
1931        { .fno = gen_helper_gvec_vmao32, },
1932    };
1933    static const GVecGen4 g_vmalo[3] = {
1934        { .fno = gen_helper_gvec_vmalo8, },
1935        { .fno = gen_helper_gvec_vmalo16, },
1936        { .fno = gen_helper_gvec_vmalo32, },
1937    };
1938    const GVecGen4 *fn;
1939
1940    if (es > ES_32) {
1941        gen_program_exception(s, PGM_SPECIFICATION);
1942        return DISAS_NORETURN;
1943    }
1944
1945    switch (s->fields.op2) {
1946    case 0xaa:
1947        fn = &g_vmal[es];
1948        break;
1949    case 0xab:
1950        fn = &g_vmah[es];
1951        break;
1952    case 0xa9:
1953        fn = &g_vmalh[es];
1954        break;
1955    case 0xae:
1956        fn = &g_vmae[es];
1957        break;
1958    case 0xac:
1959        fn = &g_vmale[es];
1960        break;
1961    case 0xaf:
1962        fn = &g_vmao[es];
1963        break;
1964    case 0xad:
1965        fn = &g_vmalo[es];
1966        break;
1967    default:
1968        g_assert_not_reached();
1969    }
1970
1971    gen_gvec_4(get_field(s, v1), get_field(s, v2),
1972               get_field(s, v3), get_field(s, v4), fn);
1973    return DISAS_NEXT;
1974}
1975
1976static void gen_mh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1977{
1978    TCGv_i32 t = tcg_temp_new_i32();
1979
1980    tcg_gen_muls2_i32(t, d, a, b);
1981    tcg_temp_free_i32(t);
1982}
1983
1984static void gen_mlh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1985{
1986    TCGv_i32 t = tcg_temp_new_i32();
1987
1988    tcg_gen_mulu2_i32(t, d, a, b);
1989    tcg_temp_free_i32(t);
1990}
1991
1992static DisasJumpType op_vm(DisasContext *s, DisasOps *o)
1993{
1994    const uint8_t es = get_field(s, m4);
1995    static const GVecGen3 g_vmh[3] = {
1996        { .fno = gen_helper_gvec_vmh8, },
1997        { .fno = gen_helper_gvec_vmh16, },
1998        { .fni4 = gen_mh_i32, },
1999    };
2000    static const GVecGen3 g_vmlh[3] = {
2001        { .fno = gen_helper_gvec_vmlh8, },
2002        { .fno = gen_helper_gvec_vmlh16, },
2003        { .fni4 = gen_mlh_i32, },
2004    };
2005    static const GVecGen3 g_vme[3] = {
2006        { .fno = gen_helper_gvec_vme8, },
2007        { .fno = gen_helper_gvec_vme16, },
2008        { .fno = gen_helper_gvec_vme32, },
2009    };
2010    static const GVecGen3 g_vmle[3] = {
2011        { .fno = gen_helper_gvec_vmle8, },
2012        { .fno = gen_helper_gvec_vmle16, },
2013        { .fno = gen_helper_gvec_vmle32, },
2014    };
2015    static const GVecGen3 g_vmo[3] = {
2016        { .fno = gen_helper_gvec_vmo8, },
2017        { .fno = gen_helper_gvec_vmo16, },
2018        { .fno = gen_helper_gvec_vmo32, },
2019    };
2020    static const GVecGen3 g_vmlo[3] = {
2021        { .fno = gen_helper_gvec_vmlo8, },
2022        { .fno = gen_helper_gvec_vmlo16, },
2023        { .fno = gen_helper_gvec_vmlo32, },
2024    };
2025    const GVecGen3 *fn;
2026
2027    if (es > ES_32) {
2028        gen_program_exception(s, PGM_SPECIFICATION);
2029        return DISAS_NORETURN;
2030    }
2031
2032    switch (s->fields.op2) {
2033    case 0xa2:
2034        gen_gvec_fn_3(mul, es, get_field(s, v1),
2035                      get_field(s, v2), get_field(s, v3));
2036        return DISAS_NEXT;
2037    case 0xa3:
2038        fn = &g_vmh[es];
2039        break;
2040    case 0xa1:
2041        fn = &g_vmlh[es];
2042        break;
2043    case 0xa6:
2044        fn = &g_vme[es];
2045        break;
2046    case 0xa4:
2047        fn = &g_vmle[es];
2048        break;
2049    case 0xa7:
2050        fn = &g_vmo[es];
2051        break;
2052    case 0xa5:
2053        fn = &g_vmlo[es];
2054        break;
2055    default:
2056        g_assert_not_reached();
2057    }
2058
2059    gen_gvec_3(get_field(s, v1), get_field(s, v2),
2060               get_field(s, v3), fn);
2061    return DISAS_NEXT;
2062}
2063
2064static DisasJumpType op_vmsl(DisasContext *s, DisasOps *o)
2065{
2066    TCGv_i64 l1, h1, l2, h2;
2067
2068    if (get_field(s, m5) != ES_64) {
2069        gen_program_exception(s, PGM_SPECIFICATION);
2070        return DISAS_NORETURN;
2071    }
2072
2073    l1 = tcg_temp_new_i64();
2074    h1 = tcg_temp_new_i64();
2075    l2 = tcg_temp_new_i64();
2076    h2 = tcg_temp_new_i64();
2077
2078    /* Multipy both even elements from v2 and v3 */
2079    read_vec_element_i64(l1, get_field(s, v2), 0, ES_64);
2080    read_vec_element_i64(h1, get_field(s, v3), 0, ES_64);
2081    tcg_gen_mulu2_i64(l1, h1, l1, h1);
2082    /* Shift result left by one (x2) if requested */
2083    if (extract32(get_field(s, m6), 3, 1)) {
2084        tcg_gen_add2_i64(l1, h1, l1, h1, l1, h1);
2085    }
2086
2087    /* Multipy both odd elements from v2 and v3 */
2088    read_vec_element_i64(l2, get_field(s, v2), 1, ES_64);
2089    read_vec_element_i64(h2, get_field(s, v3), 1, ES_64);
2090    tcg_gen_mulu2_i64(l2, h2, l2, h2);
2091    /* Shift result left by one (x2) if requested */
2092    if (extract32(get_field(s, m6), 2, 1)) {
2093        tcg_gen_add2_i64(l2, h2, l2, h2, l2, h2);
2094    }
2095
2096    /* Add both intermediate results */
2097    tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2);
2098    /* Add whole v4 */
2099    read_vec_element_i64(h2, get_field(s, v4), 0, ES_64);
2100    read_vec_element_i64(l2, get_field(s, v4), 1, ES_64);
2101    tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2);
2102
2103    /* Store final result into v1. */
2104    write_vec_element_i64(h1, get_field(s, v1), 0, ES_64);
2105    write_vec_element_i64(l1, get_field(s, v1), 1, ES_64);
2106
2107    tcg_temp_free_i64(l1);
2108    tcg_temp_free_i64(h1);
2109    tcg_temp_free_i64(l2);
2110    tcg_temp_free_i64(h2);
2111    return DISAS_NEXT;
2112}
2113
2114static DisasJumpType op_vnn(DisasContext *s, DisasOps *o)
2115{
2116    gen_gvec_fn_3(nand, ES_8, get_field(s, v1),
2117                  get_field(s, v2), get_field(s, v3));
2118    return DISAS_NEXT;
2119}
2120
2121static DisasJumpType op_vno(DisasContext *s, DisasOps *o)
2122{
2123    gen_gvec_fn_3(nor, ES_8, get_field(s, v1), get_field(s, v2),
2124                  get_field(s, v3));
2125    return DISAS_NEXT;
2126}
2127
2128static DisasJumpType op_vnx(DisasContext *s, DisasOps *o)
2129{
2130    gen_gvec_fn_3(eqv, ES_8, get_field(s, v1), get_field(s, v2),
2131                  get_field(s, v3));
2132    return DISAS_NEXT;
2133}
2134
2135static DisasJumpType op_vo(DisasContext *s, DisasOps *o)
2136{
2137    gen_gvec_fn_3(or, ES_8, get_field(s, v1), get_field(s, v2),
2138                  get_field(s, v3));
2139    return DISAS_NEXT;
2140}
2141
2142static DisasJumpType op_voc(DisasContext *s, DisasOps *o)
2143{
2144    gen_gvec_fn_3(orc, ES_8, get_field(s, v1), get_field(s, v2),
2145                  get_field(s, v3));
2146    return DISAS_NEXT;
2147}
2148
2149static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o)
2150{
2151    const uint8_t es = get_field(s, m3);
2152    static const GVecGen2 g[4] = {
2153        { .fno = gen_helper_gvec_vpopct8, },
2154        { .fno = gen_helper_gvec_vpopct16, },
2155        { .fni4 = tcg_gen_ctpop_i32, },
2156        { .fni8 = tcg_gen_ctpop_i64, },
2157    };
2158
2159    if (es > ES_64 || (es != ES_8 && !s390_has_feat(S390_FEAT_VECTOR_ENH))) {
2160        gen_program_exception(s, PGM_SPECIFICATION);
2161        return DISAS_NORETURN;
2162    }
2163
2164    gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]);
2165    return DISAS_NEXT;
2166}
2167
2168static void gen_rim_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c)
2169{
2170    TCGv_i32 t = tcg_temp_new_i32();
2171
2172    tcg_gen_rotli_i32(t, a, c & 31);
2173    tcg_gen_and_i32(t, t, b);
2174    tcg_gen_andc_i32(d, d, b);
2175    tcg_gen_or_i32(d, d, t);
2176
2177    tcg_temp_free_i32(t);
2178}
2179
2180static void gen_rim_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, int64_t c)
2181{
2182    TCGv_i64 t = tcg_temp_new_i64();
2183
2184    tcg_gen_rotli_i64(t, a, c & 63);
2185    tcg_gen_and_i64(t, t, b);
2186    tcg_gen_andc_i64(d, d, b);
2187    tcg_gen_or_i64(d, d, t);
2188
2189    tcg_temp_free_i64(t);
2190}
2191
2192static DisasJumpType op_verim(DisasContext *s, DisasOps *o)
2193{
2194    const uint8_t es = get_field(s, m5);
2195    const uint8_t i4 = get_field(s, i4) &
2196                       (NUM_VEC_ELEMENT_BITS(es) - 1);
2197    static const GVecGen3i g[4] = {
2198        { .fno = gen_helper_gvec_verim8, },
2199        { .fno = gen_helper_gvec_verim16, },
2200        { .fni4 = gen_rim_i32,
2201          .load_dest = true, },
2202        { .fni8 = gen_rim_i64,
2203          .load_dest = true, },
2204    };
2205
2206    if (es > ES_64) {
2207        gen_program_exception(s, PGM_SPECIFICATION);
2208        return DISAS_NORETURN;
2209    }
2210
2211    gen_gvec_3i(get_field(s, v1), get_field(s, v2),
2212                get_field(s, v3), i4, &g[es]);
2213    return DISAS_NEXT;
2214}
2215
2216static DisasJumpType op_vesv(DisasContext *s, DisasOps *o)
2217{
2218    const uint8_t es = get_field(s, m4);
2219    const uint8_t v1 = get_field(s, v1);
2220    const uint8_t v2 = get_field(s, v2);
2221    const uint8_t v3 = get_field(s, v3);
2222
2223    if (es > ES_64) {
2224        gen_program_exception(s, PGM_SPECIFICATION);
2225        return DISAS_NORETURN;
2226    }
2227
2228    switch (s->fields.op2) {
2229    case 0x70:
2230        gen_gvec_fn_3(shlv, es, v1, v2, v3);
2231        break;
2232    case 0x73:
2233        gen_gvec_fn_3(rotlv, es, v1, v2, v3);
2234        break;
2235    case 0x7a:
2236        gen_gvec_fn_3(sarv, es, v1, v2, v3);
2237        break;
2238    case 0x78:
2239        gen_gvec_fn_3(shrv, es, v1, v2, v3);
2240        break;
2241    default:
2242        g_assert_not_reached();
2243    }
2244    return DISAS_NEXT;
2245}
2246
2247static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
2248{
2249    const uint8_t es = get_field(s, m4);
2250    const uint8_t d2 = get_field(s, d2) &
2251                       (NUM_VEC_ELEMENT_BITS(es) - 1);
2252    const uint8_t v1 = get_field(s, v1);
2253    const uint8_t v3 = get_field(s, v3);
2254    TCGv_i32 shift;
2255
2256    if (es > ES_64) {
2257        gen_program_exception(s, PGM_SPECIFICATION);
2258        return DISAS_NORETURN;
2259    }
2260
2261    if (likely(!get_field(s, b2))) {
2262        switch (s->fields.op2) {
2263        case 0x30:
2264            gen_gvec_fn_2i(shli, es, v1, v3, d2);
2265            break;
2266        case 0x33:
2267            gen_gvec_fn_2i(rotli, es, v1, v3, d2);
2268            break;
2269        case 0x3a:
2270            gen_gvec_fn_2i(sari, es, v1, v3, d2);
2271            break;
2272        case 0x38:
2273            gen_gvec_fn_2i(shri, es, v1, v3, d2);
2274            break;
2275        default:
2276            g_assert_not_reached();
2277        }
2278    } else {
2279        shift = tcg_temp_new_i32();
2280        tcg_gen_extrl_i64_i32(shift, o->addr1);
2281        tcg_gen_andi_i32(shift, shift, NUM_VEC_ELEMENT_BITS(es) - 1);
2282        switch (s->fields.op2) {
2283        case 0x30:
2284            gen_gvec_fn_2s(shls, es, v1, v3, shift);
2285            break;
2286        case 0x33:
2287            gen_gvec_fn_2s(rotls, es, v1, v3, shift);
2288            break;
2289        case 0x3a:
2290            gen_gvec_fn_2s(sars, es, v1, v3, shift);
2291            break;
2292        case 0x38:
2293            gen_gvec_fn_2s(shrs, es, v1, v3, shift);
2294            break;
2295        default:
2296            g_assert_not_reached();
2297        }
2298        tcg_temp_free_i32(shift);
2299    }
2300    return DISAS_NEXT;
2301}
2302
2303static DisasJumpType gen_vsh_by_byte(DisasContext *s, DisasOps *o,
2304                                      gen_helper_gvec_2i *gen,
2305                                      gen_helper_gvec_3 *gen_ve2)
2306{
2307    bool byte = s->insn->data;
2308
2309    if (!byte && s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
2310        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
2311                       get_field(s, v3), 0, gen_ve2);
2312    } else {
2313        TCGv_i64 shift = tcg_temp_new_i64();
2314
2315        read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
2316        tcg_gen_andi_i64(shift, shift, byte ? 0x78 : 7);
2317        gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2), shift, 0, gen);
2318        tcg_temp_free_i64(shift);
2319    }
2320    return DISAS_NEXT;
2321}
2322
2323static DisasJumpType op_vsl(DisasContext *s, DisasOps *o)
2324{
2325    return gen_vsh_by_byte(s, o, gen_helper_gvec_vsl,
2326                            gen_helper_gvec_vsl_ve2);
2327}
2328
2329static DisasJumpType op_vsra(DisasContext *s, DisasOps *o)
2330{
2331    return gen_vsh_by_byte(s, o, gen_helper_gvec_vsra,
2332                            gen_helper_gvec_vsra_ve2);
2333}
2334
2335static DisasJumpType op_vsrl(DisasContext *s, DisasOps *o)
2336{
2337    return gen_vsh_by_byte(s, o, gen_helper_gvec_vsrl,
2338                            gen_helper_gvec_vsrl_ve2);
2339}
2340
2341static DisasJumpType op_vsld(DisasContext *s, DisasOps *o)
2342{
2343    const bool byte = s->insn->data;
2344    const uint8_t mask = byte ? 15 : 7;
2345    const uint8_t mul  = byte ?  8 : 1;
2346    const uint8_t i4   = get_field(s, i4);
2347    const int right_shift = 64 - (i4 & 7) * mul;
2348    TCGv_i64 t0, t1, t2;
2349
2350    if (i4 & ~mask) {
2351        gen_program_exception(s, PGM_SPECIFICATION);
2352        return DISAS_NORETURN;
2353    }
2354
2355    t0 = tcg_temp_new_i64();
2356    t1 = tcg_temp_new_i64();
2357    t2 = tcg_temp_new_i64();
2358
2359    if ((i4 & 8) == 0) {
2360        read_vec_element_i64(t0, get_field(s, v2), 0, ES_64);
2361        read_vec_element_i64(t1, get_field(s, v2), 1, ES_64);
2362        read_vec_element_i64(t2, get_field(s, v3), 0, ES_64);
2363    } else {
2364        read_vec_element_i64(t0, get_field(s, v2), 1, ES_64);
2365        read_vec_element_i64(t1, get_field(s, v3), 0, ES_64);
2366        read_vec_element_i64(t2, get_field(s, v3), 1, ES_64);
2367    }
2368
2369    tcg_gen_extract2_i64(t0, t1, t0, right_shift);
2370    tcg_gen_extract2_i64(t1, t2, t1, right_shift);
2371
2372    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
2373    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
2374
2375    tcg_temp_free(t0);
2376    tcg_temp_free(t1);
2377    tcg_temp_free(t2);
2378    return DISAS_NEXT;
2379}
2380
2381static DisasJumpType op_vsrd(DisasContext *s, DisasOps *o)
2382{
2383    const uint8_t i4 = get_field(s, i4);
2384    TCGv_i64 t0, t1, t2;
2385
2386    if (i4 & ~7) {
2387        gen_program_exception(s, PGM_SPECIFICATION);
2388        return DISAS_NORETURN;
2389    }
2390
2391    t0 = tcg_temp_new_i64();
2392    t1 = tcg_temp_new_i64();
2393    t2 = tcg_temp_new_i64();
2394
2395    read_vec_element_i64(t0, get_field(s, v2), 1, ES_64);
2396    read_vec_element_i64(t1, get_field(s, v3), 0, ES_64);
2397    read_vec_element_i64(t2, get_field(s, v3), 1, ES_64);
2398
2399    tcg_gen_extract2_i64(t0, t1, t0, i4);
2400    tcg_gen_extract2_i64(t1, t2, t1, i4);
2401
2402    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
2403    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
2404
2405    tcg_temp_free(t0);
2406    tcg_temp_free(t1);
2407    tcg_temp_free(t2);
2408    return DISAS_NEXT;
2409}
2410
2411static DisasJumpType op_vs(DisasContext *s, DisasOps *o)
2412{
2413    const uint8_t es = get_field(s, m4);
2414
2415    if (es > ES_128) {
2416        gen_program_exception(s, PGM_SPECIFICATION);
2417        return DISAS_NORETURN;
2418    } else if (es == ES_128) {
2419        gen_gvec128_3_i64(tcg_gen_sub2_i64, get_field(s, v1),
2420                          get_field(s, v2), get_field(s, v3));
2421        return DISAS_NEXT;
2422    }
2423    gen_gvec_fn_3(sub, es, get_field(s, v1), get_field(s, v2),
2424                  get_field(s, v3));
2425    return DISAS_NEXT;
2426}
2427
2428static void gen_scbi_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
2429{
2430    tcg_gen_setcond_i32(TCG_COND_GEU, d, a, b);
2431}
2432
2433static void gen_scbi_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
2434{
2435    tcg_gen_setcond_i64(TCG_COND_GEU, d, a, b);
2436}
2437
2438static void gen_scbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
2439                          TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2440{
2441    TCGv_i64 th = tcg_temp_new_i64();
2442    TCGv_i64 tl = tcg_temp_new_i64();
2443    TCGv_i64 zero = tcg_const_i64(0);
2444
2445    tcg_gen_sub2_i64(tl, th, al, zero, bl, zero);
2446    tcg_gen_andi_i64(th, th, 1);
2447    tcg_gen_sub2_i64(tl, th, ah, zero, th, zero);
2448    tcg_gen_sub2_i64(tl, th, tl, th, bh, zero);
2449    /* "invert" the result: -1 -> 0; 0 -> 1 */
2450    tcg_gen_addi_i64(dl, th, 1);
2451    tcg_gen_mov_i64(dh, zero);
2452
2453    tcg_temp_free_i64(th);
2454    tcg_temp_free_i64(tl);
2455    tcg_temp_free_i64(zero);
2456}
2457
2458static DisasJumpType op_vscbi(DisasContext *s, DisasOps *o)
2459{
2460    const uint8_t es = get_field(s, m4);
2461    static const GVecGen3 g[4] = {
2462        { .fno = gen_helper_gvec_vscbi8, },
2463        { .fno = gen_helper_gvec_vscbi16, },
2464        { .fni4 = gen_scbi_i32, },
2465        { .fni8 = gen_scbi_i64, },
2466    };
2467
2468    if (es > ES_128) {
2469        gen_program_exception(s, PGM_SPECIFICATION);
2470        return DISAS_NORETURN;
2471    } else if (es == ES_128) {
2472        gen_gvec128_3_i64(gen_scbi2_i64, get_field(s, v1),
2473                          get_field(s, v2), get_field(s, v3));
2474        return DISAS_NEXT;
2475    }
2476    gen_gvec_3(get_field(s, v1), get_field(s, v2),
2477               get_field(s, v3), &g[es]);
2478    return DISAS_NEXT;
2479}
2480
2481static void gen_sbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
2482                         TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
2483{
2484    TCGv_i64 tl = tcg_temp_new_i64();
2485    TCGv_i64 th = tcg_temp_new_i64();
2486
2487    tcg_gen_not_i64(tl, bl);
2488    tcg_gen_not_i64(th, bh);
2489    gen_ac2_i64(dl, dh, al, ah, tl, th, cl, ch);
2490    tcg_temp_free_i64(tl);
2491    tcg_temp_free_i64(th);
2492}
2493
2494static DisasJumpType op_vsbi(DisasContext *s, DisasOps *o)
2495{
2496    if (get_field(s, m5) != ES_128) {
2497        gen_program_exception(s, PGM_SPECIFICATION);
2498        return DISAS_NORETURN;
2499    }
2500
2501    gen_gvec128_4_i64(gen_sbi2_i64, get_field(s, v1),
2502                      get_field(s, v2), get_field(s, v3),
2503                      get_field(s, v4));
2504    return DISAS_NEXT;
2505}
2506
2507static void gen_sbcbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
2508                           TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
2509{
2510    TCGv_i64 th = tcg_temp_new_i64();
2511    TCGv_i64 tl = tcg_temp_new_i64();
2512
2513    tcg_gen_not_i64(tl, bl);
2514    tcg_gen_not_i64(th, bh);
2515    gen_accc2_i64(dl, dh, al, ah, tl, th, cl, ch);
2516
2517    tcg_temp_free_i64(tl);
2518    tcg_temp_free_i64(th);
2519}
2520
2521static DisasJumpType op_vsbcbi(DisasContext *s, DisasOps *o)
2522{
2523    if (get_field(s, m5) != ES_128) {
2524        gen_program_exception(s, PGM_SPECIFICATION);
2525        return DISAS_NORETURN;
2526    }
2527
2528    gen_gvec128_4_i64(gen_sbcbi2_i64, get_field(s, v1),
2529                      get_field(s, v2), get_field(s, v3),
2530                      get_field(s, v4));
2531    return DISAS_NEXT;
2532}
2533
2534static DisasJumpType op_vsumg(DisasContext *s, DisasOps *o)
2535{
2536    const uint8_t es = get_field(s, m4);
2537    TCGv_i64 sum, tmp;
2538    uint8_t dst_idx;
2539
2540    if (es == ES_8 || es > ES_32) {
2541        gen_program_exception(s, PGM_SPECIFICATION);
2542        return DISAS_NORETURN;
2543    }
2544
2545    sum = tcg_temp_new_i64();
2546    tmp = tcg_temp_new_i64();
2547    for (dst_idx = 0; dst_idx < 2; dst_idx++) {
2548        uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 2;
2549        const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 2 - 1;
2550
2551        read_vec_element_i64(sum, get_field(s, v3), max_idx, es);
2552        for (; idx <= max_idx; idx++) {
2553            read_vec_element_i64(tmp, get_field(s, v2), idx, es);
2554            tcg_gen_add_i64(sum, sum, tmp);
2555        }
2556        write_vec_element_i64(sum, get_field(s, v1), dst_idx, ES_64);
2557    }
2558    tcg_temp_free_i64(sum);
2559    tcg_temp_free_i64(tmp);
2560    return DISAS_NEXT;
2561}
2562
2563static DisasJumpType op_vsumq(DisasContext *s, DisasOps *o)
2564{
2565    const uint8_t es = get_field(s, m4);
2566    const uint8_t max_idx = NUM_VEC_ELEMENTS(es) - 1;
2567    TCGv_i64 sumh, suml, zero, tmpl;
2568    uint8_t idx;
2569
2570    if (es < ES_32 || es > ES_64) {
2571        gen_program_exception(s, PGM_SPECIFICATION);
2572        return DISAS_NORETURN;
2573    }
2574
2575    sumh = tcg_const_i64(0);
2576    suml = tcg_temp_new_i64();
2577    zero = tcg_const_i64(0);
2578    tmpl = tcg_temp_new_i64();
2579
2580    read_vec_element_i64(suml, get_field(s, v3), max_idx, es);
2581    for (idx = 0; idx <= max_idx; idx++) {
2582        read_vec_element_i64(tmpl, get_field(s, v2), idx, es);
2583        tcg_gen_add2_i64(suml, sumh, suml, sumh, tmpl, zero);
2584    }
2585    write_vec_element_i64(sumh, get_field(s, v1), 0, ES_64);
2586    write_vec_element_i64(suml, get_field(s, v1), 1, ES_64);
2587
2588    tcg_temp_free_i64(sumh);
2589    tcg_temp_free_i64(suml);
2590    tcg_temp_free_i64(zero);
2591    tcg_temp_free_i64(tmpl);
2592    return DISAS_NEXT;
2593}
2594
2595static DisasJumpType op_vsum(DisasContext *s, DisasOps *o)
2596{
2597    const uint8_t es = get_field(s, m4);
2598    TCGv_i32 sum, tmp;
2599    uint8_t dst_idx;
2600
2601    if (es > ES_16) {
2602        gen_program_exception(s, PGM_SPECIFICATION);
2603        return DISAS_NORETURN;
2604    }
2605
2606    sum = tcg_temp_new_i32();
2607    tmp = tcg_temp_new_i32();
2608    for (dst_idx = 0; dst_idx < 4; dst_idx++) {
2609        uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 4;
2610        const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 4 - 1;
2611
2612        read_vec_element_i32(sum, get_field(s, v3), max_idx, es);
2613        for (; idx <= max_idx; idx++) {
2614            read_vec_element_i32(tmp, get_field(s, v2), idx, es);
2615            tcg_gen_add_i32(sum, sum, tmp);
2616        }
2617        write_vec_element_i32(sum, get_field(s, v1), dst_idx, ES_32);
2618    }
2619    tcg_temp_free_i32(sum);
2620    tcg_temp_free_i32(tmp);
2621    return DISAS_NEXT;
2622}
2623
2624static DisasJumpType op_vtm(DisasContext *s, DisasOps *o)
2625{
2626    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2),
2627                   cpu_env, 0, gen_helper_gvec_vtm);
2628    set_cc_static(s);
2629    return DISAS_NEXT;
2630}
2631
2632static DisasJumpType op_vfae(DisasContext *s, DisasOps *o)
2633{
2634    const uint8_t es = get_field(s, m4);
2635    const uint8_t m5 = get_field(s, m5);
2636    static gen_helper_gvec_3 * const g[3] = {
2637        gen_helper_gvec_vfae8,
2638        gen_helper_gvec_vfae16,
2639        gen_helper_gvec_vfae32,
2640    };
2641    static gen_helper_gvec_3_ptr * const g_cc[3] = {
2642        gen_helper_gvec_vfae_cc8,
2643        gen_helper_gvec_vfae_cc16,
2644        gen_helper_gvec_vfae_cc32,
2645    };
2646    if (es > ES_32) {
2647        gen_program_exception(s, PGM_SPECIFICATION);
2648        return DISAS_NORETURN;
2649    }
2650
2651    if (extract32(m5, 0, 1)) {
2652        gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
2653                       get_field(s, v3), cpu_env, m5, g_cc[es]);
2654        set_cc_static(s);
2655    } else {
2656        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
2657                       get_field(s, v3), m5, g[es]);
2658    }
2659    return DISAS_NEXT;
2660}
2661
2662static DisasJumpType op_vfee(DisasContext *s, DisasOps *o)
2663{
2664    const uint8_t es = get_field(s, m4);
2665    const uint8_t m5 = get_field(s, m5);
2666    static gen_helper_gvec_3 * const g[3] = {
2667        gen_helper_gvec_vfee8,
2668        gen_helper_gvec_vfee16,
2669        gen_helper_gvec_vfee32,
2670    };
2671    static gen_helper_gvec_3_ptr * const g_cc[3] = {
2672        gen_helper_gvec_vfee_cc8,
2673        gen_helper_gvec_vfee_cc16,
2674        gen_helper_gvec_vfee_cc32,
2675    };
2676
2677    if (es > ES_32 || m5 & ~0x3) {
2678        gen_program_exception(s, PGM_SPECIFICATION);
2679        return DISAS_NORETURN;
2680    }
2681
2682    if (extract32(m5, 0, 1)) {
2683        gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
2684                       get_field(s, v3), cpu_env, m5, g_cc[es]);
2685        set_cc_static(s);
2686    } else {
2687        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
2688                       get_field(s, v3), m5, g[es]);
2689    }
2690    return DISAS_NEXT;
2691}
2692
2693static DisasJumpType op_vfene(DisasContext *s, DisasOps *o)
2694{
2695    const uint8_t es = get_field(s, m4);
2696    const uint8_t m5 = get_field(s, m5);
2697    static gen_helper_gvec_3 * const g[3] = {
2698        gen_helper_gvec_vfene8,
2699        gen_helper_gvec_vfene16,
2700        gen_helper_gvec_vfene32,
2701    };
2702    static gen_helper_gvec_3_ptr * const g_cc[3] = {
2703        gen_helper_gvec_vfene_cc8,
2704        gen_helper_gvec_vfene_cc16,
2705        gen_helper_gvec_vfene_cc32,
2706    };
2707
2708    if (es > ES_32 || m5 & ~0x3) {
2709        gen_program_exception(s, PGM_SPECIFICATION);
2710        return DISAS_NORETURN;
2711    }
2712
2713    if (extract32(m5, 0, 1)) {
2714        gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
2715                       get_field(s, v3), cpu_env, m5, g_cc[es]);
2716        set_cc_static(s);
2717    } else {
2718        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
2719                       get_field(s, v3), m5, g[es]);
2720    }
2721    return DISAS_NEXT;
2722}
2723
2724static DisasJumpType op_vistr(DisasContext *s, DisasOps *o)
2725{
2726    const uint8_t es = get_field(s, m3);
2727    const uint8_t m5 = get_field(s, m5);
2728    static gen_helper_gvec_2 * const g[3] = {
2729        gen_helper_gvec_vistr8,
2730        gen_helper_gvec_vistr16,
2731        gen_helper_gvec_vistr32,
2732    };
2733    static gen_helper_gvec_2_ptr * const g_cc[3] = {
2734        gen_helper_gvec_vistr_cc8,
2735        gen_helper_gvec_vistr_cc16,
2736        gen_helper_gvec_vistr_cc32,
2737    };
2738
2739    if (es > ES_32 || m5 & ~0x1) {
2740        gen_program_exception(s, PGM_SPECIFICATION);
2741        return DISAS_NORETURN;
2742    }
2743
2744    if (extract32(m5, 0, 1)) {
2745        gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2),
2746                       cpu_env, 0, g_cc[es]);
2747        set_cc_static(s);
2748    } else {
2749        gen_gvec_2_ool(get_field(s, v1), get_field(s, v2), 0,
2750                       g[es]);
2751    }
2752    return DISAS_NEXT;
2753}
2754
2755static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o)
2756{
2757    const uint8_t es = get_field(s, m5);
2758    const uint8_t m6 = get_field(s, m6);
2759    static gen_helper_gvec_4 * const g[3] = {
2760        gen_helper_gvec_vstrc8,
2761        gen_helper_gvec_vstrc16,
2762        gen_helper_gvec_vstrc32,
2763    };
2764    static gen_helper_gvec_4 * const g_rt[3] = {
2765        gen_helper_gvec_vstrc_rt8,
2766        gen_helper_gvec_vstrc_rt16,
2767        gen_helper_gvec_vstrc_rt32,
2768    };
2769    static gen_helper_gvec_4_ptr * const g_cc[3] = {
2770        gen_helper_gvec_vstrc_cc8,
2771        gen_helper_gvec_vstrc_cc16,
2772        gen_helper_gvec_vstrc_cc32,
2773    };
2774    static gen_helper_gvec_4_ptr * const g_cc_rt[3] = {
2775        gen_helper_gvec_vstrc_cc_rt8,
2776        gen_helper_gvec_vstrc_cc_rt16,
2777        gen_helper_gvec_vstrc_cc_rt32,
2778    };
2779
2780    if (es > ES_32) {
2781        gen_program_exception(s, PGM_SPECIFICATION);
2782        return DISAS_NORETURN;
2783    }
2784
2785    if (extract32(m6, 0, 1)) {
2786        if (extract32(m6, 2, 1)) {
2787            gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
2788                           get_field(s, v3), get_field(s, v4),
2789                           cpu_env, m6, g_cc_rt[es]);
2790        } else {
2791            gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
2792                           get_field(s, v3), get_field(s, v4),
2793                           cpu_env, m6, g_cc[es]);
2794        }
2795        set_cc_static(s);
2796    } else {
2797        if (extract32(m6, 2, 1)) {
2798            gen_gvec_4_ool(get_field(s, v1), get_field(s, v2),
2799                           get_field(s, v3), get_field(s, v4),
2800                           m6, g_rt[es]);
2801        } else {
2802            gen_gvec_4_ool(get_field(s, v1), get_field(s, v2),
2803                           get_field(s, v3), get_field(s, v4),
2804                           m6, g[es]);
2805        }
2806    }
2807    return DISAS_NEXT;
2808}
2809
2810static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o)
2811{
2812    typedef void (*helper_vstrs)(TCGv_ptr, TCGv_ptr, TCGv_ptr,
2813                                 TCGv_ptr, TCGv_ptr, TCGv_i32);
2814    static const helper_vstrs fns[3][2] = {
2815        { gen_helper_gvec_vstrs_8, gen_helper_gvec_vstrs_zs8 },
2816        { gen_helper_gvec_vstrs_16, gen_helper_gvec_vstrs_zs16 },
2817        { gen_helper_gvec_vstrs_32, gen_helper_gvec_vstrs_zs32 },
2818    };
2819    const uint8_t es = get_field(s, m5);
2820    const uint8_t m6 = get_field(s, m6);
2821    const bool zs = extract32(m6, 1, 1);
2822
2823    if (es > ES_32 || m6 & ~2) {
2824        gen_program_exception(s, PGM_SPECIFICATION);
2825        return DISAS_NORETURN;
2826    }
2827
2828    gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
2829                   get_field(s, v3), get_field(s, v4),
2830                   cpu_env, 0, fns[es][zs]);
2831    set_cc_static(s);
2832    return DISAS_NEXT;
2833}
2834
2835static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
2836{
2837    const uint8_t fpf = get_field(s, m4);
2838    const uint8_t m5 = get_field(s, m5);
2839    gen_helper_gvec_3_ptr *fn = NULL;
2840
2841    switch (s->fields.op2) {
2842    case 0xe3:
2843        switch (fpf) {
2844        case FPF_SHORT:
2845            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2846                fn = gen_helper_gvec_vfa32;
2847            }
2848            break;
2849        case FPF_LONG:
2850            fn = gen_helper_gvec_vfa64;
2851            break;
2852        case FPF_EXT:
2853            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2854                fn = gen_helper_gvec_vfa128;
2855            }
2856            break;
2857        default:
2858            break;
2859        }
2860        break;
2861    case 0xe5:
2862        switch (fpf) {
2863        case FPF_SHORT:
2864            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2865                fn = gen_helper_gvec_vfd32;
2866            }
2867            break;
2868        case FPF_LONG:
2869            fn = gen_helper_gvec_vfd64;
2870            break;
2871        case FPF_EXT:
2872            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2873                fn = gen_helper_gvec_vfd128;
2874            }
2875            break;
2876        default:
2877            break;
2878        }
2879        break;
2880    case 0xe7:
2881        switch (fpf) {
2882        case FPF_SHORT:
2883            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2884                fn = gen_helper_gvec_vfm32;
2885            }
2886            break;
2887        case FPF_LONG:
2888            fn = gen_helper_gvec_vfm64;
2889            break;
2890        case FPF_EXT:
2891            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2892                fn = gen_helper_gvec_vfm128;
2893            }
2894            break;
2895        default:
2896            break;
2897        }
2898        break;
2899    case 0xe2:
2900        switch (fpf) {
2901        case FPF_SHORT:
2902            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2903                fn = gen_helper_gvec_vfs32;
2904            }
2905            break;
2906        case FPF_LONG:
2907            fn = gen_helper_gvec_vfs64;
2908            break;
2909        case FPF_EXT:
2910            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2911                fn = gen_helper_gvec_vfs128;
2912            }
2913            break;
2914        default:
2915            break;
2916        }
2917        break;
2918    default:
2919        g_assert_not_reached();
2920    }
2921
2922    if (!fn || extract32(m5, 0, 3)) {
2923        gen_program_exception(s, PGM_SPECIFICATION);
2924        return DISAS_NORETURN;
2925    }
2926
2927    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
2928                   get_field(s, v3), cpu_env, m5, fn);
2929    return DISAS_NEXT;
2930}
2931
2932static DisasJumpType op_wfc(DisasContext *s, DisasOps *o)
2933{
2934    const uint8_t fpf = get_field(s, m3);
2935    const uint8_t m4 = get_field(s, m4);
2936    gen_helper_gvec_2_ptr *fn = NULL;
2937
2938    switch (fpf) {
2939    case FPF_SHORT:
2940        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2941            fn = gen_helper_gvec_wfk32;
2942            if (s->fields.op2 == 0xcb) {
2943                fn = gen_helper_gvec_wfc32;
2944            }
2945        }
2946        break;
2947    case FPF_LONG:
2948        fn = gen_helper_gvec_wfk64;
2949        if (s->fields.op2 == 0xcb) {
2950            fn = gen_helper_gvec_wfc64;
2951        }
2952        break;
2953    case FPF_EXT:
2954        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
2955            fn = gen_helper_gvec_wfk128;
2956            if (s->fields.op2 == 0xcb) {
2957                fn = gen_helper_gvec_wfc128;
2958            }
2959        }
2960        break;
2961    default:
2962        break;
2963    };
2964
2965    if (!fn || m4) {
2966        gen_program_exception(s, PGM_SPECIFICATION);
2967        return DISAS_NORETURN;
2968    }
2969
2970    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, 0, fn);
2971    set_cc_static(s);
2972    return DISAS_NEXT;
2973}
2974
2975static DisasJumpType op_vfc(DisasContext *s, DisasOps *o)
2976{
2977    const uint8_t fpf = get_field(s, m4);
2978    const uint8_t m5 = get_field(s, m5);
2979    const uint8_t m6 = get_field(s, m6);
2980    const bool cs = extract32(m6, 0, 1);
2981    const bool sq = extract32(m5, 2, 1);
2982    gen_helper_gvec_3_ptr *fn = NULL;
2983
2984    switch (s->fields.op2) {
2985    case 0xe8:
2986        switch (fpf) {
2987        case FPF_SHORT:
2988            fn = cs ? gen_helper_gvec_vfce32_cc : gen_helper_gvec_vfce32;
2989            break;
2990        case FPF_LONG:
2991            fn = cs ? gen_helper_gvec_vfce64_cc : gen_helper_gvec_vfce64;
2992            break;
2993        case FPF_EXT:
2994            fn = cs ? gen_helper_gvec_vfce128_cc : gen_helper_gvec_vfce128;
2995            break;
2996        default:
2997            break;
2998        }
2999        break;
3000    case 0xeb:
3001        switch (fpf) {
3002        case FPF_SHORT:
3003            fn = cs ? gen_helper_gvec_vfch32_cc : gen_helper_gvec_vfch32;
3004            break;
3005        case FPF_LONG:
3006            fn = cs ? gen_helper_gvec_vfch64_cc : gen_helper_gvec_vfch64;
3007            break;
3008        case FPF_EXT:
3009            fn = cs ? gen_helper_gvec_vfch128_cc : gen_helper_gvec_vfch128;
3010            break;
3011        default:
3012            break;
3013        }
3014        break;
3015    case 0xea:
3016        switch (fpf) {
3017        case FPF_SHORT:
3018            fn = cs ? gen_helper_gvec_vfche32_cc : gen_helper_gvec_vfche32;
3019            break;
3020        case FPF_LONG:
3021            fn = cs ? gen_helper_gvec_vfche64_cc : gen_helper_gvec_vfche64;
3022            break;
3023        case FPF_EXT:
3024            fn = cs ? gen_helper_gvec_vfche128_cc : gen_helper_gvec_vfche128;
3025            break;
3026        default:
3027            break;
3028        }
3029        break;
3030    default:
3031        g_assert_not_reached();
3032    }
3033
3034    if (!fn || extract32(m5, 0, 2) || extract32(m6, 1, 3) ||
3035        (!s390_has_feat(S390_FEAT_VECTOR_ENH) && (fpf != FPF_LONG || sq))) {
3036        gen_program_exception(s, PGM_SPECIFICATION);
3037        return DISAS_NORETURN;
3038    }
3039
3040    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3),
3041                   cpu_env, m5, fn);
3042    if (cs) {
3043        set_cc_static(s);
3044    }
3045    return DISAS_NEXT;
3046}
3047
3048static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
3049{
3050    const uint8_t fpf = get_field(s, m3);
3051    const uint8_t m4 = get_field(s, m4);
3052    const uint8_t erm = get_field(s, m5);
3053    gen_helper_gvec_2_ptr *fn = NULL;
3054
3055
3056    switch (s->fields.op2) {
3057    case 0xc3:
3058        switch (fpf) {
3059        case FPF_LONG:
3060            fn = gen_helper_gvec_vcdg64;
3061            break;
3062        case FPF_SHORT:
3063            if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
3064                fn = gen_helper_gvec_vcdg32;
3065            }
3066            break;
3067        default:
3068            break;
3069        }
3070        break;
3071    case 0xc1:
3072        switch (fpf) {
3073        case FPF_LONG:
3074            fn = gen_helper_gvec_vcdlg64;
3075            break;
3076        case FPF_SHORT:
3077            if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
3078                fn = gen_helper_gvec_vcdlg32;
3079            }
3080            break;
3081        default:
3082            break;
3083        }
3084        break;
3085    case 0xc2:
3086        switch (fpf) {
3087        case FPF_LONG:
3088            fn = gen_helper_gvec_vcgd64;
3089            break;
3090        case FPF_SHORT:
3091            if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
3092                fn = gen_helper_gvec_vcgd32;
3093            }
3094            break;
3095        default:
3096            break;
3097        }
3098        break;
3099    case 0xc0:
3100        switch (fpf) {
3101        case FPF_LONG:
3102            fn = gen_helper_gvec_vclgd64;
3103            break;
3104        case FPF_SHORT:
3105            if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
3106                fn = gen_helper_gvec_vclgd32;
3107            }
3108            break;
3109        default:
3110            break;
3111        }
3112        break;
3113    case 0xc7:
3114        switch (fpf) {
3115        case FPF_SHORT:
3116            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3117                fn = gen_helper_gvec_vfi32;
3118            }
3119            break;
3120        case FPF_LONG:
3121            fn = gen_helper_gvec_vfi64;
3122            break;
3123        case FPF_EXT:
3124            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3125                fn = gen_helper_gvec_vfi128;
3126            }
3127            break;
3128        default:
3129            break;
3130        }
3131        break;
3132    case 0xc5:
3133        switch (fpf) {
3134        case FPF_LONG:
3135            fn = gen_helper_gvec_vflr64;
3136            break;
3137        case FPF_EXT:
3138            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3139                fn = gen_helper_gvec_vflr128;
3140            }
3141            break;
3142        default:
3143            break;
3144        }
3145        break;
3146    default:
3147        g_assert_not_reached();
3148    }
3149
3150    if (!fn || extract32(m4, 0, 2) || erm > 7 || erm == 2) {
3151        gen_program_exception(s, PGM_SPECIFICATION);
3152        return DISAS_NORETURN;
3153    }
3154
3155    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
3156                   deposit32(m4, 4, 4, erm), fn);
3157    return DISAS_NEXT;
3158}
3159
3160static DisasJumpType op_vfll(DisasContext *s, DisasOps *o)
3161{
3162    const uint8_t fpf = get_field(s, m3);
3163    const uint8_t m4 = get_field(s, m4);
3164    gen_helper_gvec_2_ptr *fn = NULL;
3165
3166    switch (fpf) {
3167    case FPF_SHORT:
3168        fn = gen_helper_gvec_vfll32;
3169        break;
3170    case FPF_LONG:
3171        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3172            fn = gen_helper_gvec_vfll64;
3173        }
3174        break;
3175    default:
3176        break;
3177    }
3178
3179    if (!fn || extract32(m4, 0, 3)) {
3180        gen_program_exception(s, PGM_SPECIFICATION);
3181        return DISAS_NORETURN;
3182    }
3183
3184    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4, fn);
3185    return DISAS_NEXT;
3186}
3187
3188static DisasJumpType op_vfmax(DisasContext *s, DisasOps *o)
3189{
3190    const uint8_t fpf = get_field(s, m4);
3191    const uint8_t m6 = get_field(s, m6);
3192    const uint8_t m5 = get_field(s, m5);
3193    gen_helper_gvec_3_ptr *fn;
3194
3195    if (m6 == 5 || m6 == 6 || m6 == 7 || m6 > 13) {
3196        gen_program_exception(s, PGM_SPECIFICATION);
3197        return DISAS_NORETURN;
3198    }
3199
3200    switch (fpf) {
3201    case FPF_SHORT:
3202        if (s->fields.op2 == 0xef) {
3203            fn = gen_helper_gvec_vfmax32;
3204        } else {
3205            fn = gen_helper_gvec_vfmin32;
3206        }
3207        break;
3208    case FPF_LONG:
3209        if (s->fields.op2 == 0xef) {
3210            fn = gen_helper_gvec_vfmax64;
3211        } else {
3212            fn = gen_helper_gvec_vfmin64;
3213        }
3214        break;
3215    case FPF_EXT:
3216        if (s->fields.op2 == 0xef) {
3217            fn = gen_helper_gvec_vfmax128;
3218        } else {
3219            fn = gen_helper_gvec_vfmin128;
3220        }
3221        break;
3222    default:
3223        gen_program_exception(s, PGM_SPECIFICATION);
3224        return DISAS_NORETURN;
3225    }
3226
3227    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3),
3228                   cpu_env, deposit32(m5, 4, 4, m6), fn);
3229    return DISAS_NEXT;
3230}
3231
3232static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
3233{
3234    const uint8_t m5 = get_field(s, m5);
3235    const uint8_t fpf = get_field(s, m6);
3236    gen_helper_gvec_4_ptr *fn = NULL;
3237
3238    switch (s->fields.op2) {
3239    case 0x8f:
3240        switch (fpf) {
3241        case FPF_SHORT:
3242            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3243                fn = gen_helper_gvec_vfma32;
3244            }
3245            break;
3246        case FPF_LONG:
3247            fn = gen_helper_gvec_vfma64;
3248            break;
3249        case FPF_EXT:
3250            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3251                fn = gen_helper_gvec_vfma128;
3252            }
3253            break;
3254        default:
3255            break;
3256        }
3257        break;
3258    case 0x8e:
3259        switch (fpf) {
3260        case FPF_SHORT:
3261            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3262                fn = gen_helper_gvec_vfms32;
3263            }
3264            break;
3265        case FPF_LONG:
3266            fn = gen_helper_gvec_vfms64;
3267            break;
3268        case FPF_EXT:
3269            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3270                fn = gen_helper_gvec_vfms128;
3271            }
3272            break;
3273        default:
3274            break;
3275        }
3276        break;
3277    case 0x9f:
3278        switch (fpf) {
3279        case FPF_SHORT:
3280            fn = gen_helper_gvec_vfnma32;
3281            break;
3282        case FPF_LONG:
3283            fn = gen_helper_gvec_vfnma64;
3284            break;
3285        case FPF_EXT:
3286            fn = gen_helper_gvec_vfnma128;
3287            break;
3288        default:
3289            break;
3290        }
3291        break;
3292    case 0x9e:
3293        switch (fpf) {
3294        case FPF_SHORT:
3295            fn = gen_helper_gvec_vfnms32;
3296            break;
3297        case FPF_LONG:
3298            fn = gen_helper_gvec_vfnms64;
3299            break;
3300        case FPF_EXT:
3301            fn = gen_helper_gvec_vfnms128;
3302            break;
3303        default:
3304            break;
3305        }
3306        break;
3307    default:
3308        g_assert_not_reached();
3309    }
3310
3311    if (!fn || extract32(m5, 0, 3)) {
3312        gen_program_exception(s, PGM_SPECIFICATION);
3313        return DISAS_NORETURN;
3314    }
3315
3316    gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
3317                   get_field(s, v3), get_field(s, v4), cpu_env, m5, fn);
3318    return DISAS_NEXT;
3319}
3320
3321static DisasJumpType op_vfpso(DisasContext *s, DisasOps *o)
3322{
3323    const uint8_t v1 = get_field(s, v1);
3324    const uint8_t v2 = get_field(s, v2);
3325    const uint8_t fpf = get_field(s, m3);
3326    const uint8_t m4 = get_field(s, m4);
3327    const uint8_t m5 = get_field(s, m5);
3328    const bool se = extract32(m4, 3, 1);
3329    TCGv_i64 tmp;
3330
3331    if ((fpf != FPF_LONG && !s390_has_feat(S390_FEAT_VECTOR_ENH)) ||
3332        extract32(m4, 0, 3) || m5 > 2) {
3333        gen_program_exception(s, PGM_SPECIFICATION);
3334        return DISAS_NORETURN;
3335    }
3336
3337    switch (fpf) {
3338    case FPF_SHORT:
3339        if (!se) {
3340            switch (m5) {
3341            case 0:
3342                /* sign bit is inverted (complement) */
3343                gen_gvec_fn_2i(xori, ES_32, v1, v2, 1ull << 31);
3344                break;
3345            case 1:
3346                /* sign bit is set to one (negative) */
3347                gen_gvec_fn_2i(ori, ES_32, v1, v2, 1ull << 31);
3348                break;
3349            case 2:
3350                /* sign bit is set to zero (positive) */
3351                gen_gvec_fn_2i(andi, ES_32, v1, v2, (1ull << 31) - 1);
3352                break;
3353            }
3354            return DISAS_NEXT;
3355        }
3356        break;
3357    case FPF_LONG:
3358        if (!se) {
3359            switch (m5) {
3360            case 0:
3361                /* sign bit is inverted (complement) */
3362                gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63);
3363                break;
3364            case 1:
3365                /* sign bit is set to one (negative) */
3366                gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63);
3367                break;
3368            case 2:
3369                /* sign bit is set to zero (positive) */
3370                gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1);
3371                break;
3372            }
3373            return DISAS_NEXT;
3374        }
3375        break;
3376    case FPF_EXT:
3377        /* Only a single element. */
3378        break;
3379    default:
3380        gen_program_exception(s, PGM_SPECIFICATION);
3381        return DISAS_NORETURN;
3382    }
3383
3384    /* With a single element, we are only interested in bit 0. */
3385    tmp = tcg_temp_new_i64();
3386    read_vec_element_i64(tmp, v2, 0, ES_64);
3387    switch (m5) {
3388    case 0:
3389        /* sign bit is inverted (complement) */
3390        tcg_gen_xori_i64(tmp, tmp, 1ull << 63);
3391        break;
3392    case 1:
3393        /* sign bit is set to one (negative) */
3394        tcg_gen_ori_i64(tmp, tmp, 1ull << 63);
3395        break;
3396    case 2:
3397        /* sign bit is set to zero (positive) */
3398        tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1);
3399        break;
3400    }
3401    write_vec_element_i64(tmp, v1, 0, ES_64);
3402
3403    if (fpf == FPF_EXT) {
3404        read_vec_element_i64(tmp, v2, 1, ES_64);
3405        write_vec_element_i64(tmp, v1, 1, ES_64);
3406    }
3407
3408    tcg_temp_free_i64(tmp);
3409
3410    return DISAS_NEXT;
3411}
3412
3413static DisasJumpType op_vfsq(DisasContext *s, DisasOps *o)
3414{
3415    const uint8_t fpf = get_field(s, m3);
3416    const uint8_t m4 = get_field(s, m4);
3417    gen_helper_gvec_2_ptr *fn = NULL;
3418
3419    switch (fpf) {
3420    case FPF_SHORT:
3421        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3422            fn = gen_helper_gvec_vfsq32;
3423        }
3424        break;
3425    case FPF_LONG:
3426        fn = gen_helper_gvec_vfsq64;
3427        break;
3428    case FPF_EXT:
3429        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3430            fn = gen_helper_gvec_vfsq128;
3431        }
3432        break;
3433    default:
3434        break;
3435    }
3436
3437    if (!fn || extract32(m4, 0, 3)) {
3438        gen_program_exception(s, PGM_SPECIFICATION);
3439        return DISAS_NORETURN;
3440    }
3441
3442    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4, fn);
3443    return DISAS_NEXT;
3444}
3445
3446static DisasJumpType op_vftci(DisasContext *s, DisasOps *o)
3447{
3448    const uint16_t i3 = get_field(s, i3);
3449    const uint8_t fpf = get_field(s, m4);
3450    const uint8_t m5 = get_field(s, m5);
3451    gen_helper_gvec_2_ptr *fn = NULL;
3452
3453    switch (fpf) {
3454    case FPF_SHORT:
3455        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3456            fn = gen_helper_gvec_vftci32;
3457        }
3458        break;
3459    case FPF_LONG:
3460        fn = gen_helper_gvec_vftci64;
3461        break;
3462    case FPF_EXT:
3463        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
3464            fn = gen_helper_gvec_vftci128;
3465        }
3466        break;
3467    default:
3468        break;
3469    }
3470
3471    if (!fn || extract32(m5, 0, 3)) {
3472        gen_program_exception(s, PGM_SPECIFICATION);
3473        return DISAS_NORETURN;
3474    }
3475
3476    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
3477                   deposit32(m5, 4, 12, i3), fn);
3478    set_cc_static(s);
3479    return DISAS_NEXT;
3480}
3481