1 /* { dg-do run } */
2 /* { dg-options "-O2 -mavx5124vnniw" } */
3 /* { dg-require-effective-target avx5124vnniw } */
4 
5 #define DEFAULT_VALUE 0x7ffffffe
6 
7 #define AVX5124VNNIW
8 #include "avx512f-helper.h"
9 
10 #define SIZE (AVX512F_LEN / 32)
11 
12 #include "avx512f-mask-type.h"
13 
14 void
CALC(short * src1,short * src2,short * src3,short * src4,int * prev_dst,short * mult,int * dst)15 CALC (short *src1, short* src2, short *src3,
16       short *src4, int* prev_dst, short *mult, int *dst)
17 {
18   int i;
19 
20   for (i = 0; i < SIZE; i++)
21     {
22       int p1dword, p2dword;
23       long long int tmp;
24       dst[i] = prev_dst[i];
25       p1dword = (int)(src1[2*i  ]) * (int)(mult[0]);
26       p2dword = (int)(src1[2*i+1]) * (int)(mult[1]);
27       tmp = (long long)dst[i] + p1dword + p2dword;
28       if (tmp > 0x7fffffff)
29 	dst[i] = 0x7fffffff;
30       else
31 	dst[i] += p1dword + p2dword;
32 
33       p1dword = (int)(src2[2*i  ]) * (int)(mult[2]);
34       p2dword = (int)(src2[2*i+1]) * (int)(mult[3]);
35       tmp = (long long)dst[i] + p1dword + p2dword;
36       if (tmp > 0x7fffffff)
37 	dst[i] = 0x7fffffff;
38       else
39 	dst[i] += p1dword + p2dword;
40 
41       p1dword = (int)(src3[2*i  ]) * (int)(mult[4]);
42       p2dword = (int)(src3[2*i+1]) * (int)(mult[5]);
43       tmp = (long long)dst[i] + p1dword + p2dword;
44       if (tmp > 0x7fffffff)
45 	dst[i] = 0x7fffffff;
46       else
47 	dst[i] += p1dword + p2dword;
48 
49       p1dword = (int)(src4[2*i  ]) * (int)(mult[6]);
50       p2dword = (int)(src4[2*i+1]) * (int)(mult[7]);
51       tmp = (long long)dst[i] + p1dword + p2dword;
52       if (tmp > 0x7fffffff)
53 	dst[i] = 0x7fffffff;
54       else
55 	dst[i] += p1dword + p2dword;
56     }
57 }
58 
59 void
TEST(void)60 TEST (void)
61 {
62   int i;
63   UNION_TYPE (AVX512F_LEN, i_w) src1, src2, src3, src4;
64   UNION_TYPE (AVX512F_LEN, i_d) src5, dst, res1, res2, res3;
65   UNION_TYPE (128, i_w) mult;
66   MASK_TYPE mask = MASK_VALUE;
67   int res_ref[SIZE];
68 
69   for (i = 0; i < SIZE * 2; i++)
70     {
71       src1.a[i] = 2 + 7 * i % 291;
72       src2.a[i] = 3 + 11 * (i % 377) * i;
73       src3.a[i] = src1.a[i] * src1.a[i];
74       src4.a[i] = src2.a[i] * src2.a[i];
75     }
76   for (i = 0; i < 8; i++)
77     mult.a[i] = 3 + i * 2;
78 
79   for (i = 0; i < SIZE; i++)
80     src5.a[i] = DEFAULT_VALUE;
81 
82   CALC (src1.a, src2.a, src3.a, src4.a, src5.a, mult.a, res_ref);
83 
84   res1.x = INTRINSIC (_4dpwssds_epi32)	     (      src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
85   res2.x = INTRINSIC (_mask_4dpwssds_epi32)  (src5.x, mask, src1.x, src2.x, src3.x, src4.x, &mult.x);
86   res3.x = INTRINSIC (_maskz_4dpwssds_epi32) (mask, src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
87 
88   if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
89     abort ();
90 
91   MASK_MERGE (i_d) (res_ref, mask, SIZE);
92   if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
93     abort ();
94 
95   MASK_ZERO (i_d) (res_ref, mask, SIZE);
96   if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
97     abort ();
98 }
99