xref: /qemu/target/s390x/tcg/vec_fpu_helper.c (revision 1fe8ac35)
1 /*
2  * QEMU TCG support -- s390x vector floating point instruction support
3  *
4  * Copyright (C) 2019 Red Hat Inc
5  *
6  * Authors:
7  *   David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "cpu.h"
14 #include "s390x-internal.h"
15 #include "vec.h"
16 #include "tcg_s390x.h"
17 #include "tcg/tcg-gvec-desc.h"
18 #include "exec/exec-all.h"
19 #include "exec/helper-proto.h"
20 #include "fpu/softfloat.h"
21 
22 #define VIC_INVALID         0x1
23 #define VIC_DIVBYZERO       0x2
24 #define VIC_OVERFLOW        0x3
25 #define VIC_UNDERFLOW       0x4
26 #define VIC_INEXACT         0x5
27 
28 /* returns the VEX. If the VEX is 0, there is no trap */
29 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
30                               uint8_t *vec_exc)
31 {
32     uint8_t vece_exc = 0, trap_exc;
33     unsigned qemu_exc;
34 
35     /* Retrieve and clear the softfloat exceptions */
36     qemu_exc = env->fpu_status.float_exception_flags;
37     if (qemu_exc == 0) {
38         return 0;
39     }
40     env->fpu_status.float_exception_flags = 0;
41 
42     vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
43 
44     /* Add them to the vector-wide s390x exception bits */
45     *vec_exc |= vece_exc;
46 
47     /* Check for traps and construct the VXC */
48     trap_exc = vece_exc & env->fpc >> 24;
49     if (trap_exc) {
50         if (trap_exc & S390_IEEE_MASK_INVALID) {
51             return enr << 4 | VIC_INVALID;
52         } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
53             return enr << 4 | VIC_DIVBYZERO;
54         } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
55             return enr << 4 | VIC_OVERFLOW;
56         } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
57             return enr << 4 | VIC_UNDERFLOW;
58         } else if (!XxC) {
59             g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
60             /* inexact has lowest priority on traps */
61             return enr << 4 | VIC_INEXACT;
62         }
63     }
64     return 0;
65 }
66 
67 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
68                             uintptr_t retaddr)
69 {
70     if (vxc) {
71         /* on traps, the fpc flags are not updated, instruction is suppressed */
72         tcg_s390_vector_exception(env, vxc, retaddr);
73     }
74     if (vec_exc) {
75         /* indicate exceptions for all elements combined */
76         env->fpc |= vec_exc << 16;
77     }
78 }
79 
80 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
81 {
82     return make_float32(s390_vec_read_element32(v, enr));
83 }
84 
85 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
86 {
87     return make_float64(s390_vec_read_element64(v, enr));
88 }
89 
90 static float128 s390_vec_read_float128(const S390Vector *v)
91 {
92     return make_float128(s390_vec_read_element64(v, 0),
93                          s390_vec_read_element64(v, 1));
94 }
95 
96 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
97 {
98     return s390_vec_write_element32(v, enr, data);
99 }
100 
101 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
102 {
103     return s390_vec_write_element64(v, enr, data);
104 }
105 
106 static void s390_vec_write_float128(S390Vector *v, float128 data)
107 {
108     s390_vec_write_element64(v, 0, data.high);
109     s390_vec_write_element64(v, 1, data.low);
110 }
111 
112 typedef float32 (*vop32_2_fn)(float32 a, float_status *s);
113 static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
114                     bool s, bool XxC, uint8_t erm, vop32_2_fn fn,
115                     uintptr_t retaddr)
116 {
117     uint8_t vxc, vec_exc = 0;
118     S390Vector tmp = {};
119     int i, old_mode;
120 
121     old_mode = s390_swap_bfp_rounding_mode(env, erm);
122     for (i = 0; i < 4; i++) {
123         const float32 a = s390_vec_read_float32(v2, i);
124 
125         s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status));
126         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
127         if (s || vxc) {
128             break;
129         }
130     }
131     s390_restore_bfp_rounding_mode(env, old_mode);
132     handle_ieee_exc(env, vxc, vec_exc, retaddr);
133     *v1 = tmp;
134 }
135 
136 typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
137 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
138                     bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
139                     uintptr_t retaddr)
140 {
141     uint8_t vxc, vec_exc = 0;
142     S390Vector tmp = {};
143     int i, old_mode;
144 
145     old_mode = s390_swap_bfp_rounding_mode(env, erm);
146     for (i = 0; i < 2; i++) {
147         const float64 a = s390_vec_read_float64(v2, i);
148 
149         s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status));
150         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
151         if (s || vxc) {
152             break;
153         }
154     }
155     s390_restore_bfp_rounding_mode(env, old_mode);
156     handle_ieee_exc(env, vxc, vec_exc, retaddr);
157     *v1 = tmp;
158 }
159 
160 typedef float128 (*vop128_2_fn)(float128 a, float_status *s);
161 static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
162                     bool s, bool XxC, uint8_t erm, vop128_2_fn fn,
163                     uintptr_t retaddr)
164 {
165     const float128 a = s390_vec_read_float128(v2);
166     uint8_t vxc, vec_exc = 0;
167     S390Vector tmp = {};
168     int old_mode;
169 
170     old_mode = s390_swap_bfp_rounding_mode(env, erm);
171     s390_vec_write_float128(&tmp, fn(a, &env->fpu_status));
172     vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
173     s390_restore_bfp_rounding_mode(env, old_mode);
174     handle_ieee_exc(env, vxc, vec_exc, retaddr);
175     *v1 = tmp;
176 }
177 
178 static float32 vcdg32(float32 a, float_status *s)
179 {
180     return int32_to_float32(a, s);
181 }
182 
183 static float32 vcdlg32(float32 a, float_status *s)
184 {
185     return uint32_to_float32(a, s);
186 }
187 
188 static float32 vcgd32(float32 a, float_status *s)
189 {
190     const float32 tmp = float32_to_int32(a, s);
191 
192     return float32_is_any_nan(a) ? INT32_MIN : tmp;
193 }
194 
195 static float32 vclgd32(float32 a, float_status *s)
196 {
197     const float32 tmp = float32_to_uint32(a, s);
198 
199     return float32_is_any_nan(a) ? 0 : tmp;
200 }
201 
202 static float64 vcdg64(float64 a, float_status *s)
203 {
204     return int64_to_float64(a, s);
205 }
206 
207 static float64 vcdlg64(float64 a, float_status *s)
208 {
209     return uint64_to_float64(a, s);
210 }
211 
212 static float64 vcgd64(float64 a, float_status *s)
213 {
214     const float64 tmp = float64_to_int64(a, s);
215 
216     return float64_is_any_nan(a) ? INT64_MIN : tmp;
217 }
218 
219 static float64 vclgd64(float64 a, float_status *s)
220 {
221     const float64 tmp = float64_to_uint64(a, s);
222 
223     return float64_is_any_nan(a) ? 0 : tmp;
224 }
225 
226 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS)                                       \
227 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env,   \
228                                uint32_t desc)                                  \
229 {                                                                              \
230     const uint8_t erm = extract32(simd_data(desc), 4, 4);                      \
231     const bool se = extract32(simd_data(desc), 3, 1);                          \
232     const bool XxC = extract32(simd_data(desc), 2, 1);                         \
233                                                                                \
234     vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC());                     \
235 }
236 
237 #define DEF_GVEC_VOP2_32(NAME)                                                 \
238 DEF_GVEC_VOP2_FN(NAME, NAME##32, 32)
239 
240 #define DEF_GVEC_VOP2_64(NAME)                                                 \
241 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
242 
243 #define DEF_GVEC_VOP2(NAME, OP)                                                \
244 DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32)                                       \
245 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64)                                       \
246 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
247 
248 DEF_GVEC_VOP2_32(vcdg)
249 DEF_GVEC_VOP2_32(vcdlg)
250 DEF_GVEC_VOP2_32(vcgd)
251 DEF_GVEC_VOP2_32(vclgd)
252 DEF_GVEC_VOP2_64(vcdg)
253 DEF_GVEC_VOP2_64(vcdlg)
254 DEF_GVEC_VOP2_64(vcgd)
255 DEF_GVEC_VOP2_64(vclgd)
256 DEF_GVEC_VOP2(vfi, round_to_int)
257 DEF_GVEC_VOP2(vfsq, sqrt)
258 
259 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
260 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
261                     CPUS390XState *env, bool s, vop32_3_fn fn,
262                     uintptr_t retaddr)
263 {
264     uint8_t vxc, vec_exc = 0;
265     S390Vector tmp = {};
266     int i;
267 
268     for (i = 0; i < 4; i++) {
269         const float32 a = s390_vec_read_float32(v2, i);
270         const float32 b = s390_vec_read_float32(v3, i);
271 
272         s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
273         vxc = check_ieee_exc(env, i, false, &vec_exc);
274         if (s || vxc) {
275             break;
276         }
277     }
278     handle_ieee_exc(env, vxc, vec_exc, retaddr);
279     *v1 = tmp;
280 }
281 
282 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
283 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
284                     CPUS390XState *env, bool s, vop64_3_fn fn,
285                     uintptr_t retaddr)
286 {
287     uint8_t vxc, vec_exc = 0;
288     S390Vector tmp = {};
289     int i;
290 
291     for (i = 0; i < 2; i++) {
292         const float64 a = s390_vec_read_float64(v2, i);
293         const float64 b = s390_vec_read_float64(v3, i);
294 
295         s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status));
296         vxc = check_ieee_exc(env, i, false, &vec_exc);
297         if (s || vxc) {
298             break;
299         }
300     }
301     handle_ieee_exc(env, vxc, vec_exc, retaddr);
302     *v1 = tmp;
303 }
304 
305 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
306 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
307                      CPUS390XState *env, bool s, vop128_3_fn fn,
308                      uintptr_t retaddr)
309 {
310     const float128 a = s390_vec_read_float128(v2);
311     const float128 b = s390_vec_read_float128(v3);
312     uint8_t vxc, vec_exc = 0;
313     S390Vector tmp = {};
314 
315     s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
316     vxc = check_ieee_exc(env, 0, false, &vec_exc);
317     handle_ieee_exc(env, vxc, vec_exc, retaddr);
318     *v1 = tmp;
319 }
320 
321 #define DEF_GVEC_VOP3_B(NAME, OP, BITS)                                        \
322 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
323                               CPUS390XState *env, uint32_t desc)               \
324 {                                                                              \
325     const bool se = extract32(simd_data(desc), 3, 1);                          \
326                                                                                \
327     vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC());           \
328 }
329 
330 #define DEF_GVEC_VOP3(NAME, OP)                                                \
331 DEF_GVEC_VOP3_B(NAME, OP, 32)                                                  \
332 DEF_GVEC_VOP3_B(NAME, OP, 64)                                                  \
333 DEF_GVEC_VOP3_B(NAME, OP, 128)
334 
335 DEF_GVEC_VOP3(vfa, add)
336 DEF_GVEC_VOP3(vfs, sub)
337 DEF_GVEC_VOP3(vfd, div)
338 DEF_GVEC_VOP3(vfm, mul)
339 
340 static int wfc32(const S390Vector *v1, const S390Vector *v2,
341                  CPUS390XState *env, bool signal, uintptr_t retaddr)
342 {
343     /* only the zero-indexed elements are compared */
344     const float32 a = s390_vec_read_float32(v1, 0);
345     const float32 b = s390_vec_read_float32(v2, 0);
346     uint8_t vxc, vec_exc = 0;
347     int cmp;
348 
349     if (signal) {
350         cmp = float32_compare(a, b, &env->fpu_status);
351     } else {
352         cmp = float32_compare_quiet(a, b, &env->fpu_status);
353     }
354     vxc = check_ieee_exc(env, 0, false, &vec_exc);
355     handle_ieee_exc(env, vxc, vec_exc, retaddr);
356 
357     return float_comp_to_cc(env, cmp);
358 }
359 
360 static int wfc64(const S390Vector *v1, const S390Vector *v2,
361                  CPUS390XState *env, bool signal, uintptr_t retaddr)
362 {
363     /* only the zero-indexed elements are compared */
364     const float64 a = s390_vec_read_float64(v1, 0);
365     const float64 b = s390_vec_read_float64(v2, 0);
366     uint8_t vxc, vec_exc = 0;
367     int cmp;
368 
369     if (signal) {
370         cmp = float64_compare(a, b, &env->fpu_status);
371     } else {
372         cmp = float64_compare_quiet(a, b, &env->fpu_status);
373     }
374     vxc = check_ieee_exc(env, 0, false, &vec_exc);
375     handle_ieee_exc(env, vxc, vec_exc, retaddr);
376 
377     return float_comp_to_cc(env, cmp);
378 }
379 
380 static int wfc128(const S390Vector *v1, const S390Vector *v2,
381                   CPUS390XState *env, bool signal, uintptr_t retaddr)
382 {
383     /* only the zero-indexed elements are compared */
384     const float128 a = s390_vec_read_float128(v1);
385     const float128 b = s390_vec_read_float128(v2);
386     uint8_t vxc, vec_exc = 0;
387     int cmp;
388 
389     if (signal) {
390         cmp = float128_compare(a, b, &env->fpu_status);
391     } else {
392         cmp = float128_compare_quiet(a, b, &env->fpu_status);
393     }
394     vxc = check_ieee_exc(env, 0, false, &vec_exc);
395     handle_ieee_exc(env, vxc, vec_exc, retaddr);
396 
397     return float_comp_to_cc(env, cmp);
398 }
399 
400 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS)                                     \
401 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2,                 \
402                                CPUS390XState *env, uint32_t desc)              \
403 {                                                                              \
404     env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC());                      \
405 }
406 
407 #define DEF_GVEC_WFC(NAME, SIGNAL)                                             \
408      DEF_GVEC_WFC_B(NAME, SIGNAL, 32)                                          \
409      DEF_GVEC_WFC_B(NAME, SIGNAL, 64)                                          \
410      DEF_GVEC_WFC_B(NAME, SIGNAL, 128)
411 
412 DEF_GVEC_WFC(wfc, false)
413 DEF_GVEC_WFC(wfk, true)
414 
415 typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status);
416 static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
417                  CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr)
418 {
419     uint8_t vxc, vec_exc = 0;
420     S390Vector tmp = {};
421     int match = 0;
422     int i;
423 
424     for (i = 0; i < 4; i++) {
425         const float32 a = s390_vec_read_float32(v2, i);
426         const float32 b = s390_vec_read_float32(v3, i);
427 
428         /* swap the order of the parameters, so we can use existing functions */
429         if (fn(b, a, &env->fpu_status)) {
430             match++;
431             s390_vec_write_element32(&tmp, i, -1u);
432         }
433         vxc = check_ieee_exc(env, i, false, &vec_exc);
434         if (s || vxc) {
435             break;
436         }
437     }
438 
439     handle_ieee_exc(env, vxc, vec_exc, retaddr);
440     *v1 = tmp;
441     if (match) {
442         return s || match == 4 ? 0 : 1;
443     }
444     return 3;
445 }
446 
447 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
448 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
449                  CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
450 {
451     uint8_t vxc, vec_exc = 0;
452     S390Vector tmp = {};
453     int match = 0;
454     int i;
455 
456     for (i = 0; i < 2; i++) {
457         const float64 a = s390_vec_read_float64(v2, i);
458         const float64 b = s390_vec_read_float64(v3, i);
459 
460         /* swap the order of the parameters, so we can use existing functions */
461         if (fn(b, a, &env->fpu_status)) {
462             match++;
463             s390_vec_write_element64(&tmp, i, -1ull);
464         }
465         vxc = check_ieee_exc(env, i, false, &vec_exc);
466         if (s || vxc) {
467             break;
468         }
469     }
470 
471     handle_ieee_exc(env, vxc, vec_exc, retaddr);
472     *v1 = tmp;
473     if (match) {
474         return s || match == 2 ? 0 : 1;
475     }
476     return 3;
477 }
478 
479 typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status);
480 static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
481                  CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr)
482 {
483     const float128 a = s390_vec_read_float128(v2);
484     const float128 b = s390_vec_read_float128(v3);
485     uint8_t vxc, vec_exc = 0;
486     S390Vector tmp = {};
487     bool match = false;
488 
489     /* swap the order of the parameters, so we can use existing functions */
490     if (fn(b, a, &env->fpu_status)) {
491         match = true;
492         s390_vec_write_element64(&tmp, 0, -1ull);
493         s390_vec_write_element64(&tmp, 1, -1ull);
494     }
495     vxc = check_ieee_exc(env, 0, false, &vec_exc);
496     handle_ieee_exc(env, vxc, vec_exc, retaddr);
497     *v1 = tmp;
498     return match ? 0 : 3;
499 }
500 
501 #define DEF_GVEC_VFC_B(NAME, OP, BITS)                                         \
502 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
503                                CPUS390XState *env, uint32_t desc)              \
504 {                                                                              \
505     const bool se = extract32(simd_data(desc), 3, 1);                          \
506     const bool sq = extract32(simd_data(desc), 2, 1);                          \
507     vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
508                                                                                \
509     vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                               \
510 }                                                                              \
511                                                                                \
512 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3,  \
513                                     CPUS390XState *env, uint32_t desc)         \
514 {                                                                              \
515     const bool se = extract32(simd_data(desc), 3, 1);                          \
516     const bool sq = extract32(simd_data(desc), 2, 1);                          \
517     vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
518                                                                                \
519     env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                  \
520 }
521 
522 #define DEF_GVEC_VFC(NAME, OP)                                                 \
523 DEF_GVEC_VFC_B(NAME, OP, 32)                                                   \
524 DEF_GVEC_VFC_B(NAME, OP, 64)                                                   \
525 DEF_GVEC_VFC_B(NAME, OP, 128)                                                  \
526 
527 DEF_GVEC_VFC(vfce, eq)
528 DEF_GVEC_VFC(vfch, lt)
529 DEF_GVEC_VFC(vfche, le)
530 
531 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
532                          uint32_t desc)
533 {
534     const bool s = extract32(simd_data(desc), 3, 1);
535     uint8_t vxc, vec_exc = 0;
536     S390Vector tmp = {};
537     int i;
538 
539     for (i = 0; i < 2; i++) {
540         /* load from even element */
541         const float32 a = s390_vec_read_element32(v2, i * 2);
542         const uint64_t ret = float32_to_float64(a, &env->fpu_status);
543 
544         s390_vec_write_element64(&tmp, i, ret);
545         /* indicate the source element */
546         vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
547         if (s || vxc) {
548             break;
549         }
550     }
551     handle_ieee_exc(env, vxc, vec_exc, GETPC());
552     *(S390Vector *)v1 = tmp;
553 }
554 
555 void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env,
556                          uint32_t desc)
557 {
558     /* load from even element */
559     const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0),
560                                              &env->fpu_status);
561     uint8_t vxc, vec_exc = 0;
562 
563     vxc = check_ieee_exc(env, 0, false, &vec_exc);
564     handle_ieee_exc(env, vxc, vec_exc, GETPC());
565     s390_vec_write_float128(v1, ret);
566 }
567 
568 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
569                          uint32_t desc)
570 {
571     const uint8_t erm = extract32(simd_data(desc), 4, 4);
572     const bool s = extract32(simd_data(desc), 3, 1);
573     const bool XxC = extract32(simd_data(desc), 2, 1);
574     uint8_t vxc, vec_exc = 0;
575     S390Vector tmp = {};
576     int i, old_mode;
577 
578     old_mode = s390_swap_bfp_rounding_mode(env, erm);
579     for (i = 0; i < 2; i++) {
580         float64 a = s390_vec_read_element64(v2, i);
581         uint32_t ret = float64_to_float32(a, &env->fpu_status);
582 
583         /* place at even element */
584         s390_vec_write_element32(&tmp, i * 2, ret);
585         /* indicate the source element */
586         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
587         if (s || vxc) {
588             break;
589         }
590     }
591     s390_restore_bfp_rounding_mode(env, old_mode);
592     handle_ieee_exc(env, vxc, vec_exc, GETPC());
593     *(S390Vector *)v1 = tmp;
594 }
595 
596 void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env,
597                           uint32_t desc)
598 {
599     const uint8_t erm = extract32(simd_data(desc), 4, 4);
600     const bool XxC = extract32(simd_data(desc), 2, 1);
601     uint8_t vxc, vec_exc = 0;
602     int old_mode;
603     float64 ret;
604 
605     old_mode = s390_swap_bfp_rounding_mode(env, erm);
606     ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status);
607     vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
608     s390_restore_bfp_rounding_mode(env, old_mode);
609     handle_ieee_exc(env, vxc, vec_exc, GETPC());
610 
611     /* place at even element, odd element is unpredictable */
612     s390_vec_write_float64(v1, 0, ret);
613 }
614 
615 static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
616                    const S390Vector *v4, CPUS390XState *env, bool s, int flags,
617                    uintptr_t retaddr)
618 {
619     uint8_t vxc, vec_exc = 0;
620     S390Vector tmp = {};
621     int i;
622 
623     for (i = 0; i < 4; i++) {
624         const float32 a = s390_vec_read_float32(v2, i);
625         const float32 b = s390_vec_read_float32(v3, i);
626         const float32 c = s390_vec_read_float32(v4, i);
627         float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status);
628 
629         s390_vec_write_float32(&tmp, i, ret);
630         vxc = check_ieee_exc(env, i, false, &vec_exc);
631         if (s || vxc) {
632             break;
633         }
634     }
635     handle_ieee_exc(env, vxc, vec_exc, retaddr);
636     *v1 = tmp;
637 }
638 
639 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
640                    const S390Vector *v4, CPUS390XState *env, bool s, int flags,
641                    uintptr_t retaddr)
642 {
643     uint8_t vxc, vec_exc = 0;
644     S390Vector tmp = {};
645     int i;
646 
647     for (i = 0; i < 2; i++) {
648         const float64 a = s390_vec_read_float64(v2, i);
649         const float64 b = s390_vec_read_float64(v3, i);
650         const float64 c = s390_vec_read_float64(v4, i);
651         const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
652 
653         s390_vec_write_float64(&tmp, i, ret);
654         vxc = check_ieee_exc(env, i, false, &vec_exc);
655         if (s || vxc) {
656             break;
657         }
658     }
659     handle_ieee_exc(env, vxc, vec_exc, retaddr);
660     *v1 = tmp;
661 }
662 
663 static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
664                     const S390Vector *v4, CPUS390XState *env, bool s, int flags,
665                     uintptr_t retaddr)
666 {
667     const float128 a = s390_vec_read_float128(v2);
668     const float128 b = s390_vec_read_float128(v3);
669     const float128 c = s390_vec_read_float128(v4);
670     uint8_t vxc, vec_exc = 0;
671     float128 ret;
672 
673     ret = float128_muladd(a, b, c, flags, &env->fpu_status);
674     vxc = check_ieee_exc(env, 0, false, &vec_exc);
675     handle_ieee_exc(env, vxc, vec_exc, retaddr);
676     s390_vec_write_float128(v1, ret);
677 }
678 
679 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS)                                     \
680 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
681                                const void *v4, CPUS390XState *env,             \
682                                uint32_t desc)                                  \
683 {                                                                              \
684     const bool se = extract32(simd_data(desc), 3, 1);                          \
685                                                                                \
686     vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC());                       \
687 }
688 
689 #define DEF_GVEC_VFMA(NAME, FLAGS)                                             \
690     DEF_GVEC_VFMA_B(NAME, FLAGS, 32)                                           \
691     DEF_GVEC_VFMA_B(NAME, FLAGS, 64)                                           \
692     DEF_GVEC_VFMA_B(NAME, FLAGS, 128)
693 
694 DEF_GVEC_VFMA(vfma, 0)
695 DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
696 DEF_GVEC_VFMA(vfnma, float_muladd_negate_result)
697 DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result)
698 
699 void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env,
700                           uint32_t desc)
701 {
702     uint16_t i3 = extract32(simd_data(desc), 4, 12);
703     bool s = extract32(simd_data(desc), 3, 1);
704     int i, match = 0;
705 
706     for (i = 0; i < 4; i++) {
707         float32 a = s390_vec_read_float32(v2, i);
708 
709         if (float32_dcmask(env, a) & i3) {
710             match++;
711             s390_vec_write_element32(v1, i, -1u);
712         } else {
713             s390_vec_write_element32(v1, i, 0);
714         }
715         if (s) {
716             break;
717         }
718     }
719 
720     if (match == 4 || (s && match)) {
721         env->cc_op = 0;
722     } else if (match) {
723         env->cc_op = 1;
724     } else {
725         env->cc_op = 3;
726     }
727 }
728 
729 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
730                           uint32_t desc)
731 {
732     const uint16_t i3 = extract32(simd_data(desc), 4, 12);
733     const bool s = extract32(simd_data(desc), 3, 1);
734     int i, match = 0;
735 
736     for (i = 0; i < 2; i++) {
737         const float64 a = s390_vec_read_float64(v2, i);
738 
739         if (float64_dcmask(env, a) & i3) {
740             match++;
741             s390_vec_write_element64(v1, i, -1ull);
742         } else {
743             s390_vec_write_element64(v1, i, 0);
744         }
745         if (s) {
746             break;
747         }
748     }
749 
750     if (match == 2 || (s && match)) {
751         env->cc_op = 0;
752     } else if (match) {
753         env->cc_op = 1;
754     } else {
755         env->cc_op = 3;
756     }
757 }
758 
759 void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env,
760                            uint32_t desc)
761 {
762     const float128 a = s390_vec_read_float128(v2);
763     uint16_t i3 = extract32(simd_data(desc), 4, 12);
764 
765     if (float128_dcmask(env, a) & i3) {
766         env->cc_op = 0;
767         s390_vec_write_element64(v1, 0, -1ull);
768         s390_vec_write_element64(v1, 1, -1ull);
769     } else {
770         env->cc_op = 3;
771         s390_vec_write_element64(v1, 0, 0);
772         s390_vec_write_element64(v1, 1, 0);
773     }
774 }
775 
776 typedef enum S390MinMaxType {
777     S390_MINMAX_TYPE_IEEE = 0,
778     S390_MINMAX_TYPE_JAVA,
779     S390_MINMAX_TYPE_C_MACRO,
780     S390_MINMAX_TYPE_CPP,
781     S390_MINMAX_TYPE_F,
782 } S390MinMaxType;
783 
784 typedef enum S390MinMaxRes {
785     S390_MINMAX_RES_MINMAX = 0,
786     S390_MINMAX_RES_A,
787     S390_MINMAX_RES_B,
788     S390_MINMAX_RES_SILENCE_A,
789     S390_MINMAX_RES_SILENCE_B,
790 } S390MinMaxRes;
791 
792 static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b,
793                                S390MinMaxType type, float_status *s)
794 {
795     const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
796     const bool nan_a = dcmask_a & DCMASK_NAN;
797     const bool nan_b = dcmask_b & DCMASK_NAN;
798 
799     g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
800 
801     if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
802         const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
803         const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
804 
805         if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
806             s->float_exception_flags |= float_flag_invalid;
807         }
808         switch (type) {
809         case S390_MINMAX_TYPE_JAVA:
810             if (sig_a) {
811                 return S390_MINMAX_RES_SILENCE_A;
812             } else if (sig_b) {
813                 return S390_MINMAX_RES_SILENCE_B;
814             }
815             return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
816         case S390_MINMAX_TYPE_F:
817             return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
818         case S390_MINMAX_TYPE_C_MACRO:
819             s->float_exception_flags |= float_flag_invalid;
820             return S390_MINMAX_RES_B;
821         case S390_MINMAX_TYPE_CPP:
822             s->float_exception_flags |= float_flag_invalid;
823             return S390_MINMAX_RES_A;
824         default:
825             g_assert_not_reached();
826         }
827     } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) {
828         switch (type) {
829         case S390_MINMAX_TYPE_JAVA:
830             return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
831         case S390_MINMAX_TYPE_C_MACRO:
832             return S390_MINMAX_RES_B;
833         case S390_MINMAX_TYPE_F:
834             return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
835         case S390_MINMAX_TYPE_CPP:
836             return S390_MINMAX_RES_A;
837         default:
838             g_assert_not_reached();
839         }
840     }
841     return S390_MINMAX_RES_MINMAX;
842 }
843 
844 static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
845                                S390MinMaxType type, float_status *s)
846 {
847     g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
848 
849     if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
850         const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
851         const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
852         const bool nan_a = dcmask_a & DCMASK_NAN;
853         const bool nan_b = dcmask_b & DCMASK_NAN;
854 
855         if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
856             s->float_exception_flags |= float_flag_invalid;
857         }
858         switch (type) {
859         case S390_MINMAX_TYPE_JAVA:
860             if (sig_a) {
861                 return S390_MINMAX_RES_SILENCE_A;
862             } else if (sig_b) {
863                 return S390_MINMAX_RES_SILENCE_B;
864             }
865             return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
866         case S390_MINMAX_TYPE_F:
867             return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
868         case S390_MINMAX_TYPE_C_MACRO:
869             s->float_exception_flags |= float_flag_invalid;
870             return S390_MINMAX_RES_B;
871         case S390_MINMAX_TYPE_CPP:
872             s->float_exception_flags |= float_flag_invalid;
873             return S390_MINMAX_RES_A;
874         default:
875             g_assert_not_reached();
876         }
877     } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) {
878         const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
879 
880         switch (type) {
881         case S390_MINMAX_TYPE_JAVA:
882         case S390_MINMAX_TYPE_F:
883             return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
884         case S390_MINMAX_TYPE_C_MACRO:
885             return S390_MINMAX_RES_B;
886         case S390_MINMAX_TYPE_CPP:
887             return S390_MINMAX_RES_A;
888         default:
889             g_assert_not_reached();
890         }
891     }
892     return S390_MINMAX_RES_MINMAX;
893 }
894 
895 static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
896                                   S390MinMaxType type, bool is_min,
897                                   float_status *s)
898 {
899     return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) :
900                     vfmax_res(dcmask_a, dcmask_b, type, s);
901 }
902 
903 static void vfminmax32(S390Vector *v1, const S390Vector *v2,
904                        const S390Vector *v3, CPUS390XState *env,
905                        S390MinMaxType type, bool is_min, bool is_abs, bool se,
906                        uintptr_t retaddr)
907 {
908     float_status *s = &env->fpu_status;
909     uint8_t vxc, vec_exc = 0;
910     S390Vector tmp = {};
911     int i;
912 
913     for (i = 0; i < 4; i++) {
914         float32 a = s390_vec_read_float32(v2, i);
915         float32 b = s390_vec_read_float32(v3, i);
916         float32 result;
917 
918         if (type != S390_MINMAX_TYPE_IEEE) {
919             S390MinMaxRes res;
920 
921             if (is_abs) {
922                 a = float32_abs(a);
923                 b = float32_abs(b);
924             }
925 
926             res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b),
927                                type, is_min, s);
928             switch (res) {
929             case S390_MINMAX_RES_MINMAX:
930                 result = is_min ? float32_min(a, b, s) : float32_max(a, b, s);
931                 break;
932             case S390_MINMAX_RES_A:
933                 result = a;
934                 break;
935             case S390_MINMAX_RES_B:
936                 result = b;
937                 break;
938             case S390_MINMAX_RES_SILENCE_A:
939                 result = float32_silence_nan(a, s);
940                 break;
941             case S390_MINMAX_RES_SILENCE_B:
942                 result = float32_silence_nan(b, s);
943                 break;
944             default:
945                 g_assert_not_reached();
946             }
947         } else if (!is_abs) {
948             result = is_min ? float32_minnum(a, b, &env->fpu_status) :
949                               float32_maxnum(a, b, &env->fpu_status);
950         } else {
951             result = is_min ? float32_minnummag(a, b, &env->fpu_status) :
952                               float32_maxnummag(a, b, &env->fpu_status);
953         }
954 
955         s390_vec_write_float32(&tmp, i, result);
956         vxc = check_ieee_exc(env, i, false, &vec_exc);
957         if (se || vxc) {
958             break;
959         }
960     }
961     handle_ieee_exc(env, vxc, vec_exc, retaddr);
962     *v1 = tmp;
963 }
964 
965 static void vfminmax64(S390Vector *v1, const S390Vector *v2,
966                        const S390Vector *v3, CPUS390XState *env,
967                        S390MinMaxType type, bool is_min, bool is_abs, bool se,
968                        uintptr_t retaddr)
969 {
970     float_status *s = &env->fpu_status;
971     uint8_t vxc, vec_exc = 0;
972     S390Vector tmp = {};
973     int i;
974 
975     for (i = 0; i < 2; i++) {
976         float64 a = s390_vec_read_float64(v2, i);
977         float64 b = s390_vec_read_float64(v3, i);
978         float64 result;
979 
980         if (type != S390_MINMAX_TYPE_IEEE) {
981             S390MinMaxRes res;
982 
983             if (is_abs) {
984                 a = float64_abs(a);
985                 b = float64_abs(b);
986             }
987 
988             res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b),
989                                type, is_min, s);
990             switch (res) {
991             case S390_MINMAX_RES_MINMAX:
992                 result = is_min ? float64_min(a, b, s) : float64_max(a, b, s);
993                 break;
994             case S390_MINMAX_RES_A:
995                 result = a;
996                 break;
997             case S390_MINMAX_RES_B:
998                 result = b;
999                 break;
1000             case S390_MINMAX_RES_SILENCE_A:
1001                 result = float64_silence_nan(a, s);
1002                 break;
1003             case S390_MINMAX_RES_SILENCE_B:
1004                 result = float64_silence_nan(b, s);
1005                 break;
1006             default:
1007                 g_assert_not_reached();
1008             }
1009         } else if (!is_abs) {
1010             result = is_min ? float64_minnum(a, b, &env->fpu_status) :
1011                               float64_maxnum(a, b, &env->fpu_status);
1012         } else {
1013             result = is_min ? float64_minnummag(a, b, &env->fpu_status) :
1014                               float64_maxnummag(a, b, &env->fpu_status);
1015         }
1016 
1017         s390_vec_write_float64(&tmp, i, result);
1018         vxc = check_ieee_exc(env, i, false, &vec_exc);
1019         if (se || vxc) {
1020             break;
1021         }
1022     }
1023     handle_ieee_exc(env, vxc, vec_exc, retaddr);
1024     *v1 = tmp;
1025 }
1026 
1027 static void vfminmax128(S390Vector *v1, const S390Vector *v2,
1028                         const S390Vector *v3, CPUS390XState *env,
1029                         S390MinMaxType type, bool is_min, bool is_abs, bool se,
1030                         uintptr_t retaddr)
1031 {
1032     float128 a = s390_vec_read_float128(v2);
1033     float128 b = s390_vec_read_float128(v3);
1034     float_status *s = &env->fpu_status;
1035     uint8_t vxc, vec_exc = 0;
1036     float128 result;
1037 
1038     if (type != S390_MINMAX_TYPE_IEEE) {
1039         S390MinMaxRes res;
1040 
1041         if (is_abs) {
1042             a = float128_abs(a);
1043             b = float128_abs(b);
1044         }
1045 
1046         res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b),
1047                            type, is_min, s);
1048         switch (res) {
1049         case S390_MINMAX_RES_MINMAX:
1050             result = is_min ? float128_min(a, b, s) : float128_max(a, b, s);
1051             break;
1052         case S390_MINMAX_RES_A:
1053             result = a;
1054             break;
1055         case S390_MINMAX_RES_B:
1056             result = b;
1057             break;
1058         case S390_MINMAX_RES_SILENCE_A:
1059             result = float128_silence_nan(a, s);
1060             break;
1061         case S390_MINMAX_RES_SILENCE_B:
1062             result = float128_silence_nan(b, s);
1063             break;
1064         default:
1065             g_assert_not_reached();
1066         }
1067     } else if (!is_abs) {
1068         result = is_min ? float128_minnum(a, b, &env->fpu_status) :
1069                           float128_maxnum(a, b, &env->fpu_status);
1070     } else {
1071         result = is_min ? float128_minnummag(a, b, &env->fpu_status) :
1072                           float128_maxnummag(a, b, &env->fpu_status);
1073     }
1074 
1075     vxc = check_ieee_exc(env, 0, false, &vec_exc);
1076     handle_ieee_exc(env, vxc, vec_exc, retaddr);
1077     s390_vec_write_float128(v1, result);
1078 }
1079 
1080 #define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS)                                \
1081 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
1082                                CPUS390XState *env, uint32_t desc)              \
1083 {                                                                              \
1084     const bool se = extract32(simd_data(desc), 3, 1);                          \
1085     uint8_t type = extract32(simd_data(desc), 4, 4);                           \
1086     bool is_abs = false;                                                       \
1087                                                                                \
1088     if (type >= 8) {                                                           \
1089         is_abs = true;                                                         \
1090         type -= 8;                                                             \
1091     }                                                                          \
1092                                                                                \
1093     vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC());        \
1094 }
1095 
1096 #define DEF_GVEC_VFMINMAX(NAME, IS_MIN)                                        \
1097     DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32)                                      \
1098     DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64)                                      \
1099     DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128)
1100 
1101 DEF_GVEC_VFMINMAX(vfmax, false)
1102 DEF_GVEC_VFMINMAX(vfmin, true)
1103