1 /* { dg-do run } */
2 /* { dg-require-effective-target arm_neon_ok } */
3 /* { dg-require-effective-target arm_neon_hw } */
4 /* { dg-require-effective-target arm_little_endian } */
5 /* { dg-options "-O2" } */
6 /* { dg-add-options arm_neon } */
7 
8 #include <arm_neon.h>
9 #include <stdlib.h>
10 #include <stdio.h>
11 
12 uint8x8_t
tst_vext_u8(uint8x8_t __a,uint8x8_t __b)13 tst_vext_u8 (uint8x8_t __a, uint8x8_t __b)
14 {
15   uint8x8_t __mask1 = {2, 3, 4, 5, 6, 7, 8, 9};
16 
17   return __builtin_shuffle ( __a, __b, __mask1) ;
18 }
19 
20 uint8x8_t
tst_vext_u8_rotate(uint8x8_t __a)21 tst_vext_u8_rotate (uint8x8_t __a)
22 {
23   uint8x8_t __mask1 = {2, 3, 4, 5, 6, 7, 0, 1};
24   return __builtin_shuffle ( __a, __mask1) ;
25 }
26 
27 uint16x4_t
tst_vext_u16(uint16x4_t __a,uint16x4_t __b)28 tst_vext_u16 (uint16x4_t __a, uint16x4_t __b)
29 {
30   uint16x4_t __mask1 = {2, 3, 4, 5};
31   return __builtin_shuffle ( __a, __b, __mask1) ;
32 }
33 
34 uint16x4_t
tst_vext_u16_rotate(uint16x4_t __a)35 tst_vext_u16_rotate (uint16x4_t __a)
36 {
37   uint16x4_t __mask1 = {2, 3, 0, 1};
38   return __builtin_shuffle ( __a, __mask1) ;
39 }
40 
41 uint32x2_t
tst_vext_u32(uint32x2_t __a,uint32x2_t __b)42 tst_vext_u32 (uint32x2_t __a, uint32x2_t __b)
43 {
44   uint32x2_t __mask1 = {1, 2};
45   return __builtin_shuffle ( __a, __b, __mask1) ;
46 }
47 
48 /* This one is mapped into vrev64.32.  */
49 uint32x2_t
tst_vext_u32_rotate(uint32x2_t __a)50 tst_vext_u32_rotate (uint32x2_t __a)
51 {
52   uint32x2_t __mask1 = {1, 0};
53   return __builtin_shuffle ( __a, __mask1) ;
54 }
55 
56 uint8x16_t
tst_vextq_u8(uint8x16_t __a,uint8x16_t __b)57 tst_vextq_u8 (uint8x16_t __a, uint8x16_t __b)
58 {
59   uint8x16_t __mask1 = {4, 5, 6, 7, 8, 9, 10, 11,
60 			12, 13, 14, 15, 16, 17, 18, 19};
61   return __builtin_shuffle ( __a, __b, __mask1) ;
62 }
63 
64 uint8x16_t
tst_vextq_u8_rotate(uint8x16_t __a)65 tst_vextq_u8_rotate (uint8x16_t __a)
66 {
67   uint8x16_t __mask1 = {4, 5, 6, 7, 8, 9, 10, 11,
68 			12, 13, 14, 15, 0, 1, 2, 3};
69   return __builtin_shuffle ( __a, __mask1) ;
70 }
71 
72 uint16x8_t
tst_vextq_u16(uint16x8_t __a,uint16x8_t __b)73 tst_vextq_u16 (uint16x8_t __a, uint16x8_t __b)
74 {
75   uint16x8_t __mask1 = {2, 3, 4, 5, 6, 7, 8, 9};
76   return __builtin_shuffle ( __a, __b, __mask1) ;
77 }
78 
79 uint16x8_t
tst_vextq_u16_rotate(uint16x8_t __a)80 tst_vextq_u16_rotate (uint16x8_t __a)
81 {
82   uint16x8_t __mask1 = {2, 3, 4, 5, 6, 7, 0, 1};
83   return __builtin_shuffle ( __a, __mask1) ;
84 }
85 
86 uint32x4_t
tst_vextq_u32(uint32x4_t __a,uint32x4_t __b)87 tst_vextq_u32 (uint32x4_t __a, uint32x4_t __b)
88 {
89   uint32x4_t __mask1 = {1, 2, 3, 4};
90   return __builtin_shuffle ( __a, __b, __mask1) ;
91 }
92 
93 uint32x4_t
tst_vextq_u32_rotate(uint32x4_t __a)94 tst_vextq_u32_rotate (uint32x4_t __a)
95 {
96   uint32x4_t __mask1 = {1, 2, 3, 0};
97   return __builtin_shuffle ( __a, __mask1) ;
98 }
99 
100 uint64x2_t
tst_vextq_u64(uint64x2_t __a,uint64x2_t __b)101 tst_vextq_u64 (uint64x2_t __a, uint64x2_t __b)
102 {
103   uint64x2_t __mask1 = {1, 2};
104   return __builtin_shuffle ( __a, __b, __mask1) ;
105 }
106 
107 uint64x2_t
tst_vextq_u64_rotate(uint64x2_t __a)108 tst_vextq_u64_rotate (uint64x2_t __a)
109 {
110   uint64x2_t __mask1 = {1, 0};
111   return __builtin_shuffle ( __a, __mask1) ;
112 }
113 
main(void)114 int main (void)
115 {
116   uint8_t arr_u8x8[] = {0, 1, 2, 3, 4, 5, 6, 7};
117   uint8_t arr2_u8x8[] = {8, 9, 10, 11, 12, 13, 14, 15};
118   uint16_t arr_u16x4[] = {0, 1, 2, 3};
119   uint16_t arr2_u16x4[] = {4, 5, 6, 7};
120   uint32_t arr_u32x2[] = {0, 1};
121   uint32_t arr2_u32x2[] = {2, 3};
122   uint8_t arr_u8x16[] = {0, 1, 2, 3, 4, 5, 6, 7,
123 			 8, 9, 10, 11, 12, 13, 14, 15};
124   uint8_t arr2_u8x16[] = {16, 17, 18, 19, 20, 21, 22, 23,
125 			  24, 25, 26, 27, 28, 29, 30, 31};
126   uint16_t arr_u16x8[] = {0, 1, 2, 3, 4, 5, 6, 7};
127   uint16_t arr2_u16x8[] = {8, 9, 10, 11, 12, 13, 14, 15};
128   uint32_t arr_u32x4[] = {0, 1, 2, 3};
129   uint32_t arr2_u32x4[] = {4, 5, 6, 7};
130   uint64_t arr_u64x2[] = {0, 1};
131   uint64_t arr2_u64x2[] = {2, 3};
132 
133   uint8_t expected_u8x8[] = {2, 3, 4, 5, 6, 7, 8, 9};
134   uint8_t expected_rot_u8x8[] = {2, 3, 4, 5, 6, 7, 0, 1};
135   uint16_t expected_u16x4[] = {2, 3, 4, 5};
136   uint16_t expected_rot_u16x4[] = {2, 3, 0, 1};
137   uint32_t expected_u32x2[] = {1, 2};
138   uint32_t expected_rot_u32x2[] = {1, 0};
139   uint8_t expected_u8x16[] = {4, 5, 6, 7, 8, 9, 10, 11,
140 			      12, 13, 14, 15, 16, 17, 18, 19};
141   uint8_t expected_rot_u8x16[] = {4, 5, 6, 7, 8, 9, 10, 11,
142 				  12, 13, 14, 15, 0, 1, 2, 3,};
143   uint16_t expected_u16x8[] = {2, 3, 4, 5, 6, 7, 8, 9};
144   uint16_t expected_rot_u16x8[] = {2, 3, 4, 5, 6, 7, 0, 1};
145   uint32_t expected_u32x4[] = {1, 2, 3, 4};
146   uint32_t expected_rot_u32x4[] = {1, 2, 3, 0};
147   uint64_t expected_u64x2[] = {1, 2};
148   uint64_t expected_rot_u64x2[] = {1, 0};
149 
150   uint8x8_t vec_u8x8 = vld1_u8 (arr_u8x8);
151   uint8x8_t vec2_u8x8 = vld1_u8 (arr2_u8x8);
152   uint16x4_t vec_u16x4 = vld1_u16 (arr_u16x4);
153   uint16x4_t vec2_u16x4 = vld1_u16 (arr2_u16x4);
154   uint32x2_t vec_u32x2 = vld1_u32 (arr_u32x2);
155   uint32x2_t vec2_u32x2 = vld1_u32 (arr2_u32x2);
156   uint8x16_t vec_u8x16 = vld1q_u8 (arr_u8x16);
157   uint8x16_t vec2_u8x16 = vld1q_u8 (arr2_u8x16);
158   uint16x8_t vec_u16x8 = vld1q_u16 (arr_u16x8);
159   uint16x8_t vec2_u16x8 = vld1q_u16 (arr2_u16x8);
160   uint32x4_t vec_u32x4 = vld1q_u32 (arr_u32x4);
161   uint32x4_t vec2_u32x4 = vld1q_u32 (arr2_u32x4);
162   uint64x2_t vec_u64x2 = vld1q_u64 (arr_u64x2);
163   uint64x2_t vec2_u64x2 = vld1q_u64 (arr2_u64x2);
164 
165   uint8x8_t result_u8x8;
166   uint16x4_t result_u16x4;
167   uint32x2_t result_u32x2;
168   uint8x16_t result_u8x16;
169   uint16x8_t result_u16x8;
170   uint32x4_t result_u32x4;
171   uint64x2_t result_u64x2;
172 
173   union {uint8x8_t v; uint8_t buf[8];} mem_u8x8;
174   union {uint16x4_t v; uint16_t buf[4];} mem_u16x4;
175   union {uint32x2_t v; uint32_t buf[2];} mem_u32x2;
176   union {uint8x16_t v; uint8_t buf[16];} mem_u8x16;
177   union {uint16x8_t v; uint16_t buf[8];} mem_u16x8;
178   union {uint32x4_t v; uint32_t buf[4];} mem_u32x4;
179   union {uint64x2_t v; uint64_t buf[2];} mem_u64x2;
180 
181   int i;
182 
183   result_u8x8 = tst_vext_u8 (vec_u8x8, vec2_u8x8);
184   vst1_u8 (mem_u8x8.buf, result_u8x8);
185 
186   for (i=0; i<8; i++)
187       if (mem_u8x8.buf[i] != expected_u8x8[i])
188 	{
189 	  printf ("tst_vext_u8[%d]=%d expected %d\n",
190 		  i, mem_u8x8.buf[i], expected_u8x8[i]);
191 	  abort ();
192 	}
193 
194   result_u8x8 = tst_vext_u8_rotate (vec_u8x8);
195   vst1_u8 (mem_u8x8.buf, result_u8x8);
196 
197   for (i=0; i<8; i++)
198       if (mem_u8x8.buf[i] != expected_rot_u8x8[i])
199 	{
200 	  printf ("tst_vext_u8_rotate[%d]=%d expected %d\n",
201 		  i, mem_u8x8.buf[i], expected_rot_u8x8[i]);
202 	  abort ();
203 	}
204 
205 
206   result_u16x4 = tst_vext_u16 (vec_u16x4, vec2_u16x4);
207   vst1_u16 (mem_u16x4.buf, result_u16x4);
208 
209   for (i=0; i<4; i++)
210       if (mem_u16x4.buf[i] != expected_u16x4[i])
211 	{
212 	  printf ("tst_vext_u16[%d]=%d expected %d\n",
213 		  i, mem_u16x4.buf[i], expected_u16x4[i]);
214 	  abort ();
215 	}
216 
217   result_u16x4 = tst_vext_u16_rotate (vec_u16x4);
218   vst1_u16 (mem_u16x4.buf, result_u16x4);
219 
220   for (i=0; i<4; i++)
221       if (mem_u16x4.buf[i] != expected_rot_u16x4[i])
222 	{
223 	  printf ("tst_vext_u16_rotate[%d]=%d expected %d\n",
224 		  i, mem_u16x4.buf[i], expected_rot_u16x4[i]);
225 	  abort ();
226 	}
227 
228 
229   result_u32x2 = tst_vext_u32 (vec_u32x2, vec2_u32x2);
230   vst1_u32 (mem_u32x2.buf, result_u32x2);
231 
232   for (i=0; i<2; i++)
233       if (mem_u32x2.buf[i] != expected_u32x2[i])
234 	{
235 	  printf ("tst_vext_u32[%d]=%d expected %d\n",
236 		  i, mem_u32x2.buf[i], expected_u32x2[i]);
237 	  abort ();
238 	}
239 
240   result_u32x2 = tst_vext_u32_rotate (vec_u32x2);
241   vst1_u32 (mem_u32x2.buf, result_u32x2);
242 
243   for (i=0; i<2; i++)
244       if (mem_u32x2.buf[i] != expected_rot_u32x2[i])
245 	{
246 	  printf ("tst_vext_u32_rotate[%d]=%d expected %d\n",
247 		  i, mem_u32x2.buf[i], expected_rot_u32x2[i]);
248 	  abort ();
249 	}
250 
251 
252   result_u8x16 = tst_vextq_u8 (vec_u8x16, vec2_u8x16);
253   vst1q_u8 (mem_u8x16.buf, result_u8x16);
254 
255   for (i=0; i<16; i++)
256       if (mem_u8x16.buf[i] != expected_u8x16[i])
257 	{
258 	  printf ("tst_vextq_u8[%d]=%d expected %d\n",
259 		  i, mem_u8x16.buf[i], expected_u8x16[i]);
260 	  abort ();
261 	}
262 
263   result_u8x16 = tst_vextq_u8_rotate (vec_u8x16);
264   vst1q_u8 (mem_u8x16.buf, result_u8x16);
265 
266   for (i=0; i<16; i++)
267       if (mem_u8x16.buf[i] != expected_rot_u8x16[i])
268 	{
269 	  printf ("tst_vextq_u8_rotate[%d]=%d expected %d\n",
270 		  i, mem_u8x16.buf[i], expected_rot_u8x16[i]);
271 	  abort ();
272 	}
273 
274   result_u16x8 = tst_vextq_u16 (vec_u16x8, vec2_u16x8);
275   vst1q_u16 (mem_u16x8.buf, result_u16x8);
276 
277   for (i=0; i<8; i++)
278       if (mem_u16x8.buf[i] != expected_u16x8[i])
279 	{
280 	  printf ("tst_vextq_u16[%d]=%d expected %d\n",
281 		  i, mem_u16x8.buf[i], expected_u16x8[i]);
282 	  abort ();
283 	}
284 
285   result_u16x8 = tst_vextq_u16_rotate (vec_u16x8);
286   vst1q_u16 (mem_u16x8.buf, result_u16x8);
287 
288   for (i=0; i<8; i++)
289       if (mem_u16x8.buf[i] != expected_rot_u16x8[i])
290 	{
291 	  printf ("tst_vextq_u16_rotate[%d]=%d expected %d\n",
292 		  i, mem_u16x8.buf[i], expected_rot_u16x8[i]);
293 	  abort ();
294 	}
295 
296   result_u32x4 = tst_vextq_u32 (vec_u32x4, vec2_u32x4);
297   vst1q_u32 (mem_u32x4.buf, result_u32x4);
298 
299   for (i=0; i<4; i++)
300       if (mem_u32x4.buf[i] != expected_u32x4[i])
301 	{
302 	  printf ("tst_vextq_u32[%d]=%d expected %d\n",
303 		  i, mem_u32x4.buf[i], expected_u32x4[i]);
304 	  abort ();
305 	}
306 
307   result_u32x4 = tst_vextq_u32_rotate (vec_u32x4);
308   vst1q_u32 (mem_u32x4.buf, result_u32x4);
309 
310   for (i=0; i<4; i++)
311       if (mem_u32x4.buf[i] != expected_rot_u32x4[i])
312 	{
313 	  printf ("tst_vextq_u32_rotate[%d]=%d expected %d\n",
314 		  i, mem_u32x4.buf[i], expected_rot_u32x4[i]);
315 	  abort ();
316 	}
317 
318   result_u64x2 = tst_vextq_u64 (vec_u64x2, vec2_u64x2);
319   vst1q_u64 (mem_u64x2.buf, result_u64x2);
320 
321   for (i=0; i<2; i++)
322       if (mem_u64x2.buf[i] != expected_u64x2[i])
323 	{
324 	  printf ("tst_vextq_u64[%d]=%lld expected %lld\n",
325 		  i, mem_u64x2.buf[i], expected_u64x2[i]);
326 	  abort ();
327 	}
328 
329   result_u64x2 = tst_vextq_u64_rotate (vec_u64x2);
330   vst1q_u64 (mem_u64x2.buf, result_u64x2);
331 
332   for (i=0; i<2; i++)
333       if (mem_u64x2.buf[i] != expected_rot_u64x2[i])
334 	{
335 	  printf ("tst_vextq_u64_rotate[%d]=%lld expected %lld\n",
336 		  i, mem_u64x2.buf[i], expected_rot_u64x2[i]);
337 	  abort ();
338 	}
339 
340   return 0;
341 }
342