1 /* { dg-do run { target avx512fp16 } } */
2 /* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
3
4
5 #define AVX512FP16
6 #include "avx512fp16-helper.h"
7
8 #define N_ELEMS 8
9
10 void NOINLINE
EMULATE(c_fmadd_csh)11 EMULATE(c_fmadd_csh) (V512 * dest, V512 op1, V512 op2,
12 __mmask8 k, int zero_mask, int c_flag,
13 int is_mask3)
14 {
15 V512 v1, v2, v3, v4, v5, v6, v7, v8;
16 int i;
17 int invert = 1;
18 if (c_flag == 1)
19 invert = -1;
20
21 unpack_ph_2twops(op1, &v1, &v2);
22 unpack_ph_2twops(op2, &v3, &v4);
23 unpack_ph_2twops(*dest, &v7, &v8);
24
25 if ((k&1) || !k) {
26 v5.f32[0] = v1.f32[0] * v7.f32[0]
27 - invert * (v1.f32[1] * v7.f32[1]) + v3.f32[0];
28 v5.f32[1] = v1.f32[0] * v7.f32[1]
29 + invert * (v1.f32[1] * v7.f32[0]) + v3.f32[1];
30 }
31 else if (zero_mask)
32 v5.f32[0] = 0;
33 else
34 v5.f32[0] = v7.f32[0];
35
36 for (i = 2; i < 8; i++)
37 v5.f32[i] = is_mask3? v3.f32[i] : v7.f32[i];
38
39 *dest = pack_twops_2ph(v5, v6);
40 }
41
42 void
TEST(void)43 TEST (void)
44 {
45 V512 res;
46 V512 exp;
47
48 init_src();
49
50 init_dest(&res, &exp);
51 EMULATE(c_fmadd_csh)(&exp, src1, src2, 0x1, 0, 0, 0);
52 res.xmmh[0] = _mm_fmadd_round_sch(res.xmmh[0], src1.xmmh[0],
53 src2.xmmh[0], _ROUND_NINT);
54 CHECK_RESULT (&res, &exp, N_ELEMS, _mm_fmadd_sch);
55
56 init_dest(&res, &exp);
57 EMULATE(c_fmadd_csh)(&exp, src1, src2, 0x1, 0, 0, 0);
58 res.xmmh[0] = _mm_mask_fmadd_round_sch(res.xmmh[0], 0x1, src1.xmmh[0],
59 src2.xmmh[0], _ROUND_NINT);
60 CHECK_RESULT (&res, &exp, N_ELEMS, _mm_mask_fmadd_sch);
61 init_dest(&res, &exp);
62 EMULATE(c_fmadd_csh)(&exp, src1, src2, 0x1, 0, 0, 1);
63 res.xmmh[0] = _mm_mask3_fmadd_round_sch(res.xmmh[0], src1.xmmh[0], src2.xmmh[0],
64 0x1, _ROUND_NINT);
65 CHECK_RESULT (&res, &exp, N_ELEMS, _mm_mask3_fmadd_sch);
66
67 init_dest(&res, &exp);
68 EMULATE(c_fmadd_csh)(&exp, src1, src2, 0x3, 1, 0, 0);
69 res.xmmh[0] = _mm_maskz_fmadd_round_sch(0x3, res.xmmh[0], src1.xmmh[0],
70 src2.xmmh[0], _ROUND_NINT);
71 CHECK_RESULT (&res, &exp, N_ELEMS, _mm_maskz_fmadd_sch);
72
73 if (n_errs != 0) {
74 abort ();
75 }
76 }
77
78