1 /* { dg-do run } */
2 /* { dg-require-effective-target ssse3 } */
3 /* { dg-options "-O2 -fno-strict-aliasing -mssse3" } */
4 
5 #ifndef CHECK_H
6 #define CHECK_H "ssse3-check.h"
7 #endif
8 
9 #ifndef TEST
10 #define TEST ssse3_test
11 #endif
12 
13 #include CHECK_H
14 
15 #include "ssse3-vals.h"
16 
17 #include <tmmintrin.h>
18 #include <string.h>
19 
20 /* Test the 64-bit form */
21 static void
ssse3_test_palignr(int * i1,int * i2,unsigned int imm,int * r)22 ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
23 {
24   __m64 t1 = *(__m64 *) i1;
25   __m64 t2 = *(__m64 *) i2;
26 
27   switch (imm)
28     {
29     case 0:
30       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);
31       break;
32     case 1:
33       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);
34       break;
35     case 2:
36       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);
37       break;
38     case 3:
39       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);
40       break;
41     case 4:
42       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);
43       break;
44     case 5:
45       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);
46       break;
47     case 6:
48       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);
49       break;
50     case 7:
51       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);
52       break;
53     case 8:
54       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);
55       break;
56     case 9:
57       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);
58       break;
59     case 10:
60       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);
61       break;
62     case 11:
63       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);
64       break;
65     case 12:
66       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);
67       break;
68     case 13:
69       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);
70       break;
71     case 14:
72       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);
73       break;
74     case 15:
75       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);
76       break;
77     default:
78       *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);
79       break;
80     }
81 
82    _mm_empty();
83 }
84 
85 /* Test the 128-bit form */
86 static void
ssse3_test_palignr128(int * i1,int * i2,unsigned int imm,int * r)87 ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r)
88 {
89   /* Assumes incoming pointers are 16-byte aligned */
90   __m128i t1 = *(__m128i *) i1;
91   __m128i t2 = *(__m128i *) i2;
92 
93   switch (imm)
94     {
95     case 0:
96       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);
97       break;
98     case 1:
99       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);
100       break;
101     case 2:
102       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);
103       break;
104     case 3:
105       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);
106       break;
107     case 4:
108       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);
109       break;
110     case 5:
111       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);
112       break;
113     case 6:
114       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);
115       break;
116     case 7:
117       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);
118       break;
119     case 8:
120       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);
121       break;
122     case 9:
123       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);
124       break;
125     case 10:
126       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);
127       break;
128     case 11:
129       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);
130       break;
131     case 12:
132       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);
133       break;
134     case 13:
135       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);
136       break;
137     case 14:
138       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);
139       break;
140     case 15:
141       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);
142       break;
143     case 16:
144       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);
145       break;
146     case 17:
147       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);
148       break;
149     case 18:
150       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);
151       break;
152     case 19:
153       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);
154       break;
155     case 20:
156       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);
157       break;
158     case 21:
159       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);
160       break;
161     case 22:
162       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);
163       break;
164     case 23:
165       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);
166       break;
167     case 24:
168       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);
169       break;
170     case 25:
171       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);
172       break;
173     case 26:
174       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);
175       break;
176     case 27:
177       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);
178       break;
179     case 28:
180       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);
181       break;
182     case 29:
183       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);
184       break;
185     case 30:
186       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);
187       break;
188     case 31:
189       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);
190       break;
191     default:
192       *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);
193       break;
194     }
195 }
196 
197 /* Routine to manually compute the results */
198 static void
compute_correct_result_128(int * i1,int * i2,unsigned int imm,int * r)199 compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
200 {
201   char buf [32];
202   char *bout = (char *) r;
203   int i;
204 
205   memcpy (&buf[0], i2, 16);
206   memcpy (&buf[16], i1, 16);
207 
208   for (i = 0; i < 16; i++)
209     if (imm >= 32 || imm + i >= 32)
210       bout[i] = 0;
211     else
212       bout[i] = buf[imm + i];
213 }
214 
215 static void
compute_correct_result_64(int * i1,int * i2,unsigned int imm,int * r)216 compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
217 {
218   char buf [16];
219   char *bout = (char *)r;
220   int i;
221 
222   /* Handle the first half */
223   memcpy (&buf[0], i2, 8);
224   memcpy (&buf[8], i1, 8);
225 
226   for (i = 0; i < 8; i++)
227     if (imm >= 16 || imm + i >= 16)
228       bout[i] = 0;
229     else
230       bout[i] = buf[imm + i];
231 
232   /* Handle the second half */
233   memcpy (&buf[0], &i2[2], 8);
234   memcpy (&buf[8], &i1[2], 8);
235 
236   for (i = 0; i < 8; i++)
237     if (imm >= 16 || imm + i >= 16)
238       bout[i + 8] = 0;
239     else
240       bout[i + 8] = buf[imm + i];
241 }
242 
243 static void
TEST(void)244 TEST (void)
245 {
246   int i;
247   int r [4] __attribute__ ((aligned(16)));
248   int ck [4];
249   unsigned int imm;
250   int fail = 0;
251 
252   for (i = 0; i < 256; i += 8)
253     for (imm = 0; imm < 100; imm++)
254       {
255 	/* Manually compute the result */
256 	compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
257 
258 	/* Run the 64-bit tests */
259 	ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
260 	ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
261 	fail += chk_128 (ck, r);
262 
263 	/* Recompute the results for 128-bits */
264 	compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
265 
266 	/* Run the 128-bit tests */
267 	ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);
268 	fail += chk_128 (ck, r);
269       }
270 
271   if (fail != 0)
272     abort ();
273 }
274