1 /* { dg-do run } */
2 /* { dg-require-effective-target xop } */
3 /* { dg-options "-O2 -mxop" } */
4
5 #include "xop-check.h"
6
7 #include <x86intrin.h>
8 #include <string.h>
9
10 #define NUM 10
11
12 union
13 {
14 __m128i x[NUM];
15 unsigned char ssi[NUM * 16];
16 unsigned short si[NUM * 8];
17 unsigned int li[NUM * 4];
18 unsigned long long lli[NUM * 2];
19 } dst, res, src1;
20
21 static void
init_byte()22 init_byte ()
23 {
24 int i;
25 for (i=0; i < NUM * 16; i++)
26 src1.ssi[i] = i;
27 }
28
29 static void
init_word()30 init_word ()
31 {
32 int i;
33 for (i=0; i < NUM * 8; i++)
34 src1.si[i] = i;
35 }
36
37 static void
init_dword()38 init_dword ()
39 {
40 int i;
41 for (i=0; i < NUM * 4; i++)
42 src1.li[i] = i;
43 }
44
45 static int
check_byte2word()46 check_byte2word ()
47 {
48 int i, j, s, t, check_fails = 0;
49 for (i = 0; i < NUM * 16; i = i + 16)
50 {
51 for (j = 0; j < 8; j++)
52 {
53 t = i + (2 * j);
54 s = (i / 2) + j;
55 res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ;
56 if (res.si[s] != dst.si[s])
57 check_fails++;
58 }
59 }
60 return check_fails;
61 }
62
63 static int
check_byte2dword()64 check_byte2dword ()
65 {
66 int i, j, s, t, check_fails = 0;
67 for (i = 0; i < NUM * 16; i = i + 16)
68 {
69 for (j = 0; j < 4; j++)
70 {
71 t = i + (4 * j);
72 s = (i / 4) + j;
73 res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
74 + src1.ssi[t + 3]);
75 if (res.li[s] != dst.li[s])
76 check_fails++;
77 }
78 }
79 return check_fails;
80 }
81
82 static int
check_byte2qword()83 check_byte2qword ()
84 {
85 int i, j, s, t, check_fails = 0;
86 for (i = 0; i < NUM * 16; i = i + 16)
87 {
88 for (j = 0; j < 2; j++)
89 {
90 t = i + (8 * j);
91 s = (i / 8) + j;
92 res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
93 + src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5])
94 + (src1.ssi[t + 6] + src1.ssi[t + 7]));
95 if (res.lli[s] != dst.lli[s])
96 check_fails++;
97 }
98 }
99 return check_fails;
100 }
101
102 static int
check_word2dword()103 check_word2dword ()
104 {
105 int i, j, s, t, check_fails = 0;
106 for (i = 0; i < NUM * 8; i = i + 8)
107 {
108 for (j = 0; j < 4; j++)
109 {
110 t = i + (2 * j);
111 s = (i / 2) + j;
112 res.li[s] = src1.si[t] + src1.si[t + 1] ;
113 if (res.li[s] != dst.li[s])
114 check_fails++;
115 }
116 }
117 return check_fails;
118 }
119
120 static int
check_word2qword()121 check_word2qword ()
122 {
123 int i, j, s, t, check_fails = 0;
124 for (i = 0; i < NUM * 8; i = i + 8)
125 {
126 for (j = 0; j < 2; j++)
127 {
128 t = i + (4 * j);
129 s = (i / 4) + j;
130 res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2]
131 + src1.si[t + 3]);
132 if (res.lli[s] != dst.lli[s])
133 check_fails++;
134 }
135 }
136 return check_fails;
137 }
138
139 static int
check_dword2qword()140 check_dword2qword ()
141 {
142 int i, j, s, t, check_fails = 0;
143 for (i = 0; i < NUM * 4; i = i + 4)
144 {
145 for (j = 0; j < 2; j++)
146 {
147 t = i + (2 * j);
148 s = (i / 2) + j;
149 res.lli[s] = src1.li[t] + src1.li[t + 1] ;
150 if (res.lli[s] != dst.lli[s])
151 check_fails++;
152 }
153 }
154 return check_fails;
155 }
156
157 static void
xop_test(void)158 xop_test (void)
159 {
160 int i;
161
162 /* Check haddubw */
163 init_byte ();
164
165 for (i = 0; i < NUM; i++)
166 dst.x[i] = _mm_haddw_epu8 (src1.x[i]);
167
168 if (check_byte2word())
169 abort ();
170
171 /* Check haddubd */
172 for (i = 0; i < NUM; i++)
173 dst.x[i] = _mm_haddd_epu8 (src1.x[i]);
174
175 if (check_byte2dword())
176 abort ();
177
178 /* Check haddubq */
179 for (i = 0; i < NUM; i++)
180 dst.x[i] = _mm_haddq_epu8 (src1.x[i]);
181
182 if (check_byte2qword())
183 abort ();
184
185 /* Check hadduwd */
186 init_word ();
187
188 for (i = 0; i < NUM; i++)
189 dst.x[i] = _mm_haddd_epu16 (src1.x[i]);
190
191 if (check_word2dword())
192 abort ();
193
194 /* Check haddbuwq */
195 for (i = 0; i < NUM; i++)
196 dst.x[i] = _mm_haddq_epu16 (src1.x[i]);
197
198 if (check_word2qword())
199 abort ();
200
201 /* Check hadudq */
202 init_dword ();
203
204 for (i = 0; i < NUM; i++)
205 dst.x[i] = _mm_haddq_epu32 (src1.x[i]);
206
207 if (check_dword2qword())
208 abort ();
209 }
210