1 /* { dg-do run } */
2 /* { dg-require-effective-target ssse3 } */
3 /* { dg-options "-O2 -fno-strict-aliasing -mssse3" } */
4
5 #ifndef CHECK_H
6 #define CHECK_H "ssse3-check.h"
7 #endif
8
9 #ifndef TEST
10 #define TEST ssse3_test
11 #endif
12
13 #include CHECK_H
14
15 #include "ssse3-vals.h"
16
17 #include <tmmintrin.h>
18 #include <string.h>
19
20 /* Test the 64-bit form */
21 static void
ssse3_test_palignr(int * i1,int * i2,unsigned int imm,int * r)22 ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
23 {
24 __m64 t1 = *(__m64 *) i1;
25 __m64 t2 = *(__m64 *) i2;
26
27 switch (imm)
28 {
29 case 0:
30 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);
31 break;
32 case 1:
33 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);
34 break;
35 case 2:
36 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);
37 break;
38 case 3:
39 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);
40 break;
41 case 4:
42 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);
43 break;
44 case 5:
45 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);
46 break;
47 case 6:
48 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);
49 break;
50 case 7:
51 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);
52 break;
53 case 8:
54 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);
55 break;
56 case 9:
57 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);
58 break;
59 case 10:
60 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);
61 break;
62 case 11:
63 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);
64 break;
65 case 12:
66 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);
67 break;
68 case 13:
69 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);
70 break;
71 case 14:
72 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);
73 break;
74 case 15:
75 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);
76 break;
77 default:
78 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);
79 break;
80 }
81
82 _mm_empty();
83 }
84
85 /* Test the 128-bit form */
86 static void
ssse3_test_palignr128(int * i1,int * i2,unsigned int imm,int * r)87 ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r)
88 {
89 /* Assumes incoming pointers are 16-byte aligned */
90 __m128i t1 = *(__m128i *) i1;
91 __m128i t2 = *(__m128i *) i2;
92
93 switch (imm)
94 {
95 case 0:
96 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);
97 break;
98 case 1:
99 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);
100 break;
101 case 2:
102 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);
103 break;
104 case 3:
105 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);
106 break;
107 case 4:
108 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);
109 break;
110 case 5:
111 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);
112 break;
113 case 6:
114 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);
115 break;
116 case 7:
117 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);
118 break;
119 case 8:
120 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);
121 break;
122 case 9:
123 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);
124 break;
125 case 10:
126 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);
127 break;
128 case 11:
129 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);
130 break;
131 case 12:
132 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);
133 break;
134 case 13:
135 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);
136 break;
137 case 14:
138 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);
139 break;
140 case 15:
141 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);
142 break;
143 case 16:
144 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);
145 break;
146 case 17:
147 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);
148 break;
149 case 18:
150 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);
151 break;
152 case 19:
153 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);
154 break;
155 case 20:
156 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);
157 break;
158 case 21:
159 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);
160 break;
161 case 22:
162 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);
163 break;
164 case 23:
165 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);
166 break;
167 case 24:
168 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);
169 break;
170 case 25:
171 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);
172 break;
173 case 26:
174 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);
175 break;
176 case 27:
177 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);
178 break;
179 case 28:
180 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);
181 break;
182 case 29:
183 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);
184 break;
185 case 30:
186 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);
187 break;
188 case 31:
189 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);
190 break;
191 default:
192 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);
193 break;
194 }
195 }
196
197 /* Routine to manually compute the results */
198 static void
compute_correct_result_128(int * i1,int * i2,unsigned int imm,int * r)199 compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
200 {
201 char buf [32];
202 char *bout = (char *) r;
203 int i;
204
205 memcpy (&buf[0], i2, 16);
206 memcpy (&buf[16], i1, 16);
207
208 for (i = 0; i < 16; i++)
209 if (imm >= 32 || imm + i >= 32)
210 bout[i] = 0;
211 else
212 bout[i] = buf[imm + i];
213 }
214
215 static void
compute_correct_result_64(int * i1,int * i2,unsigned int imm,int * r)216 compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
217 {
218 char buf [16];
219 char *bout = (char *)r;
220 int i;
221
222 /* Handle the first half */
223 memcpy (&buf[0], i2, 8);
224 memcpy (&buf[8], i1, 8);
225
226 for (i = 0; i < 8; i++)
227 if (imm >= 16 || imm + i >= 16)
228 bout[i] = 0;
229 else
230 bout[i] = buf[imm + i];
231
232 /* Handle the second half */
233 memcpy (&buf[0], &i2[2], 8);
234 memcpy (&buf[8], &i1[2], 8);
235
236 for (i = 0; i < 8; i++)
237 if (imm >= 16 || imm + i >= 16)
238 bout[i + 8] = 0;
239 else
240 bout[i + 8] = buf[imm + i];
241 }
242
243 static void
TEST(void)244 TEST (void)
245 {
246 int i;
247 int r [4] __attribute__ ((aligned(16)));
248 int ck [4];
249 unsigned int imm;
250 int fail = 0;
251
252 for (i = 0; i < 256; i += 8)
253 for (imm = 0; imm < 100; imm++)
254 {
255 /* Manually compute the result */
256 compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
257
258 /* Run the 64-bit tests */
259 ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
260 ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
261 fail += chk_128 (ck, r);
262
263 /* Recompute the results for 128-bits */
264 compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
265
266 /* Run the 128-bit tests */
267 ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);
268 fail += chk_128 (ck, r);
269 }
270
271 if (fail != 0)
272 abort ();
273 }
274