1 /* { dg-do run } */
2 /* { dg-require-effective-target arm_neon_ok } */
3 /* { dg-require-effective-target arm_little_endian } */
4 /* { dg-options "-O2" } */
5 /* { dg-add-options arm_neon } */
6 
7 #include <arm_neon.h>
8 #include <stdlib.h>
9 #include <stdio.h>
10 
11 uint8x8_t
tst_vext_u8(uint8x8_t __a,uint8x8_t __b)12 tst_vext_u8 (uint8x8_t __a, uint8x8_t __b)
13 {
14   uint8x8_t __mask1 = {2, 3, 4, 5, 6, 7, 8, 9};
15 
16   return __builtin_shuffle ( __a, __b, __mask1) ;
17 }
18 
19 uint8x8_t
tst_vext_u8_rotate(uint8x8_t __a)20 tst_vext_u8_rotate (uint8x8_t __a)
21 {
22   uint8x8_t __mask1 = {2, 3, 4, 5, 6, 7, 0, 1};
23   return __builtin_shuffle ( __a, __mask1) ;
24 }
25 
26 uint16x4_t
tst_vext_u16(uint16x4_t __a,uint16x4_t __b)27 tst_vext_u16 (uint16x4_t __a, uint16x4_t __b)
28 {
29   uint16x4_t __mask1 = {2, 3, 4, 5};
30   return __builtin_shuffle ( __a, __b, __mask1) ;
31 }
32 
33 uint16x4_t
tst_vext_u16_rotate(uint16x4_t __a)34 tst_vext_u16_rotate (uint16x4_t __a)
35 {
36   uint16x4_t __mask1 = {2, 3, 0, 1};
37   return __builtin_shuffle ( __a, __mask1) ;
38 }
39 
40 uint32x2_t
tst_vext_u32(uint32x2_t __a,uint32x2_t __b)41 tst_vext_u32 (uint32x2_t __a, uint32x2_t __b)
42 {
43   uint32x2_t __mask1 = {1, 2};
44   return __builtin_shuffle ( __a, __b, __mask1) ;
45 }
46 
47 /* This one is mapped into vrev64.32.  */
48 uint32x2_t
tst_vext_u32_rotate(uint32x2_t __a)49 tst_vext_u32_rotate (uint32x2_t __a)
50 {
51   uint32x2_t __mask1 = {1, 0};
52   return __builtin_shuffle ( __a, __mask1) ;
53 }
54 
55 uint8x16_t
tst_vextq_u8(uint8x16_t __a,uint8x16_t __b)56 tst_vextq_u8 (uint8x16_t __a, uint8x16_t __b)
57 {
58   uint8x16_t __mask1 = {4, 5, 6, 7, 8, 9, 10, 11,
59 			12, 13, 14, 15, 16, 17, 18, 19};
60   return __builtin_shuffle ( __a, __b, __mask1) ;
61 }
62 
63 uint8x16_t
tst_vextq_u8_rotate(uint8x16_t __a)64 tst_vextq_u8_rotate (uint8x16_t __a)
65 {
66   uint8x16_t __mask1 = {4, 5, 6, 7, 8, 9, 10, 11,
67 			12, 13, 14, 15, 0, 1, 2, 3};
68   return __builtin_shuffle ( __a, __mask1) ;
69 }
70 
71 uint16x8_t
tst_vextq_u16(uint16x8_t __a,uint16x8_t __b)72 tst_vextq_u16 (uint16x8_t __a, uint16x8_t __b)
73 {
74   uint16x8_t __mask1 = {2, 3, 4, 5, 6, 7, 8, 9};
75   return __builtin_shuffle ( __a, __b, __mask1) ;
76 }
77 
78 uint16x8_t
tst_vextq_u16_rotate(uint16x8_t __a)79 tst_vextq_u16_rotate (uint16x8_t __a)
80 {
81   uint16x8_t __mask1 = {2, 3, 4, 5, 6, 7, 0, 1};
82   return __builtin_shuffle ( __a, __mask1) ;
83 }
84 
85 uint32x4_t
tst_vextq_u32(uint32x4_t __a,uint32x4_t __b)86 tst_vextq_u32 (uint32x4_t __a, uint32x4_t __b)
87 {
88   uint32x4_t __mask1 = {1, 2, 3, 4};
89   return __builtin_shuffle ( __a, __b, __mask1) ;
90 }
91 
92 uint32x4_t
tst_vextq_u32_rotate(uint32x4_t __a)93 tst_vextq_u32_rotate (uint32x4_t __a)
94 {
95   uint32x4_t __mask1 = {1, 2, 3, 0};
96   return __builtin_shuffle ( __a, __mask1) ;
97 }
98 
99 uint64x2_t
tst_vextq_u64(uint64x2_t __a,uint64x2_t __b)100 tst_vextq_u64 (uint64x2_t __a, uint64x2_t __b)
101 {
102   uint64x2_t __mask1 = {1, 2};
103   return __builtin_shuffle ( __a, __b, __mask1) ;
104 }
105 
106 uint64x2_t
tst_vextq_u64_rotate(uint64x2_t __a)107 tst_vextq_u64_rotate (uint64x2_t __a)
108 {
109   uint64x2_t __mask1 = {1, 0};
110   return __builtin_shuffle ( __a, __mask1) ;
111 }
112 
main(void)113 int main (void)
114 {
115   uint8_t arr_u8x8[] = {0, 1, 2, 3, 4, 5, 6, 7};
116   uint8_t arr2_u8x8[] = {8, 9, 10, 11, 12, 13, 14, 15};
117   uint16_t arr_u16x4[] = {0, 1, 2, 3};
118   uint16_t arr2_u16x4[] = {4, 5, 6, 7};
119   uint32_t arr_u32x2[] = {0, 1};
120   uint32_t arr2_u32x2[] = {2, 3};
121   uint8_t arr_u8x16[] = {0, 1, 2, 3, 4, 5, 6, 7,
122 			 8, 9, 10, 11, 12, 13, 14, 15};
123   uint8_t arr2_u8x16[] = {16, 17, 18, 19, 20, 21, 22, 23,
124 			  24, 25, 26, 27, 28, 29, 30, 31};
125   uint16_t arr_u16x8[] = {0, 1, 2, 3, 4, 5, 6, 7};
126   uint16_t arr2_u16x8[] = {8, 9, 10, 11, 12, 13, 14, 15};
127   uint32_t arr_u32x4[] = {0, 1, 2, 3};
128   uint32_t arr2_u32x4[] = {4, 5, 6, 7};
129   uint64_t arr_u64x2[] = {0, 1};
130   uint64_t arr2_u64x2[] = {2, 3};
131 
132   uint8_t expected_u8x8[] = {2, 3, 4, 5, 6, 7, 8, 9};
133   uint8_t expected_rot_u8x8[] = {2, 3, 4, 5, 6, 7, 0, 1};
134   uint16_t expected_u16x4[] = {2, 3, 4, 5};
135   uint16_t expected_rot_u16x4[] = {2, 3, 0, 1};
136   uint32_t expected_u32x2[] = {1, 2};
137   uint32_t expected_rot_u32x2[] = {1, 0};
138   uint8_t expected_u8x16[] = {4, 5, 6, 7, 8, 9, 10, 11,
139 			      12, 13, 14, 15, 16, 17, 18, 19};
140   uint8_t expected_rot_u8x16[] = {4, 5, 6, 7, 8, 9, 10, 11,
141 				  12, 13, 14, 15, 0, 1, 2, 3,};
142   uint16_t expected_u16x8[] = {2, 3, 4, 5, 6, 7, 8, 9};
143   uint16_t expected_rot_u16x8[] = {2, 3, 4, 5, 6, 7, 0, 1};
144   uint32_t expected_u32x4[] = {1, 2, 3, 4};
145   uint32_t expected_rot_u32x4[] = {1, 2, 3, 0};
146   uint64_t expected_u64x2[] = {1, 2};
147   uint64_t expected_rot_u64x2[] = {1, 0};
148 
149   uint8x8_t vec_u8x8 = vld1_u8 (arr_u8x8);
150   uint8x8_t vec2_u8x8 = vld1_u8 (arr2_u8x8);
151   uint16x4_t vec_u16x4 = vld1_u16 (arr_u16x4);
152   uint16x4_t vec2_u16x4 = vld1_u16 (arr2_u16x4);
153   uint32x2_t vec_u32x2 = vld1_u32 (arr_u32x2);
154   uint32x2_t vec2_u32x2 = vld1_u32 (arr2_u32x2);
155   uint8x16_t vec_u8x16 = vld1q_u8 (arr_u8x16);
156   uint8x16_t vec2_u8x16 = vld1q_u8 (arr2_u8x16);
157   uint16x8_t vec_u16x8 = vld1q_u16 (arr_u16x8);
158   uint16x8_t vec2_u16x8 = vld1q_u16 (arr2_u16x8);
159   uint32x4_t vec_u32x4 = vld1q_u32 (arr_u32x4);
160   uint32x4_t vec2_u32x4 = vld1q_u32 (arr2_u32x4);
161   uint64x2_t vec_u64x2 = vld1q_u64 (arr_u64x2);
162   uint64x2_t vec2_u64x2 = vld1q_u64 (arr2_u64x2);
163 
164   uint8x8_t result_u8x8;
165   uint16x4_t result_u16x4;
166   uint32x2_t result_u32x2;
167   uint8x16_t result_u8x16;
168   uint16x8_t result_u16x8;
169   uint32x4_t result_u32x4;
170   uint64x2_t result_u64x2;
171 
172   union {uint8x8_t v; uint8_t buf[8];} mem_u8x8;
173   union {uint16x4_t v; uint16_t buf[4];} mem_u16x4;
174   union {uint32x2_t v; uint32_t buf[2];} mem_u32x2;
175   union {uint8x16_t v; uint8_t buf[16];} mem_u8x16;
176   union {uint16x8_t v; uint16_t buf[8];} mem_u16x8;
177   union {uint32x4_t v; uint32_t buf[4];} mem_u32x4;
178   union {uint64x2_t v; uint64_t buf[2];} mem_u64x2;
179 
180   int i;
181 
182   result_u8x8 = tst_vext_u8 (vec_u8x8, vec2_u8x8);
183   vst1_u8 (mem_u8x8.buf, result_u8x8);
184 
185   for (i=0; i<8; i++)
186       if (mem_u8x8.buf[i] != expected_u8x8[i])
187 	{
188 	  printf ("tst_vext_u8[%d]=%d expected %d\n",
189 		  i, mem_u8x8.buf[i], expected_u8x8[i]);
190 	  abort ();
191 	}
192 
193   result_u8x8 = tst_vext_u8_rotate (vec_u8x8);
194   vst1_u8 (mem_u8x8.buf, result_u8x8);
195 
196   for (i=0; i<8; i++)
197       if (mem_u8x8.buf[i] != expected_rot_u8x8[i])
198 	{
199 	  printf ("tst_vext_u8_rotate[%d]=%d expected %d\n",
200 		  i, mem_u8x8.buf[i], expected_rot_u8x8[i]);
201 	  abort ();
202 	}
203 
204 
205   result_u16x4 = tst_vext_u16 (vec_u16x4, vec2_u16x4);
206   vst1_u16 (mem_u16x4.buf, result_u16x4);
207 
208   for (i=0; i<4; i++)
209       if (mem_u16x4.buf[i] != expected_u16x4[i])
210 	{
211 	  printf ("tst_vext_u16[%d]=%d expected %d\n",
212 		  i, mem_u16x4.buf[i], expected_u16x4[i]);
213 	  abort ();
214 	}
215 
216   result_u16x4 = tst_vext_u16_rotate (vec_u16x4);
217   vst1_u16 (mem_u16x4.buf, result_u16x4);
218 
219   for (i=0; i<4; i++)
220       if (mem_u16x4.buf[i] != expected_rot_u16x4[i])
221 	{
222 	  printf ("tst_vext_u16_rotate[%d]=%d expected %d\n",
223 		  i, mem_u16x4.buf[i], expected_rot_u16x4[i]);
224 	  abort ();
225 	}
226 
227 
228   result_u32x2 = tst_vext_u32 (vec_u32x2, vec2_u32x2);
229   vst1_u32 (mem_u32x2.buf, result_u32x2);
230 
231   for (i=0; i<2; i++)
232       if (mem_u32x2.buf[i] != expected_u32x2[i])
233 	{
234 	  printf ("tst_vext_u32[%d]=%d expected %d\n",
235 		  i, mem_u32x2.buf[i], expected_u32x2[i]);
236 	  abort ();
237 	}
238 
239   result_u32x2 = tst_vext_u32_rotate (vec_u32x2);
240   vst1_u32 (mem_u32x2.buf, result_u32x2);
241 
242   for (i=0; i<2; i++)
243       if (mem_u32x2.buf[i] != expected_rot_u32x2[i])
244 	{
245 	  printf ("tst_vext_u32_rotate[%d]=%d expected %d\n",
246 		  i, mem_u32x2.buf[i], expected_rot_u32x2[i]);
247 	  abort ();
248 	}
249 
250 
251   result_u8x16 = tst_vextq_u8 (vec_u8x16, vec2_u8x16);
252   vst1q_u8 (mem_u8x16.buf, result_u8x16);
253 
254   for (i=0; i<16; i++)
255       if (mem_u8x16.buf[i] != expected_u8x16[i])
256 	{
257 	  printf ("tst_vextq_u8[%d]=%d expected %d\n",
258 		  i, mem_u8x16.buf[i], expected_u8x16[i]);
259 	  abort ();
260 	}
261 
262   result_u8x16 = tst_vextq_u8_rotate (vec_u8x16);
263   vst1q_u8 (mem_u8x16.buf, result_u8x16);
264 
265   for (i=0; i<16; i++)
266       if (mem_u8x16.buf[i] != expected_rot_u8x16[i])
267 	{
268 	  printf ("tst_vextq_u8_rotate[%d]=%d expected %d\n",
269 		  i, mem_u8x16.buf[i], expected_rot_u8x16[i]);
270 	  abort ();
271 	}
272 
273   result_u16x8 = tst_vextq_u16 (vec_u16x8, vec2_u16x8);
274   vst1q_u16 (mem_u16x8.buf, result_u16x8);
275 
276   for (i=0; i<8; i++)
277       if (mem_u16x8.buf[i] != expected_u16x8[i])
278 	{
279 	  printf ("tst_vextq_u16[%d]=%d expected %d\n",
280 		  i, mem_u16x8.buf[i], expected_u16x8[i]);
281 	  abort ();
282 	}
283 
284   result_u16x8 = tst_vextq_u16_rotate (vec_u16x8);
285   vst1q_u16 (mem_u16x8.buf, result_u16x8);
286 
287   for (i=0; i<8; i++)
288       if (mem_u16x8.buf[i] != expected_rot_u16x8[i])
289 	{
290 	  printf ("tst_vextq_u16_rotate[%d]=%d expected %d\n",
291 		  i, mem_u16x8.buf[i], expected_rot_u16x8[i]);
292 	  abort ();
293 	}
294 
295   result_u32x4 = tst_vextq_u32 (vec_u32x4, vec2_u32x4);
296   vst1q_u32 (mem_u32x4.buf, result_u32x4);
297 
298   for (i=0; i<4; i++)
299       if (mem_u32x4.buf[i] != expected_u32x4[i])
300 	{
301 	  printf ("tst_vextq_u32[%d]=%d expected %d\n",
302 		  i, mem_u32x4.buf[i], expected_u32x4[i]);
303 	  abort ();
304 	}
305 
306   result_u32x4 = tst_vextq_u32_rotate (vec_u32x4);
307   vst1q_u32 (mem_u32x4.buf, result_u32x4);
308 
309   for (i=0; i<4; i++)
310       if (mem_u32x4.buf[i] != expected_rot_u32x4[i])
311 	{
312 	  printf ("tst_vextq_u32_rotate[%d]=%d expected %d\n",
313 		  i, mem_u32x4.buf[i], expected_rot_u32x4[i]);
314 	  abort ();
315 	}
316 
317   result_u64x2 = tst_vextq_u64 (vec_u64x2, vec2_u64x2);
318   vst1q_u64 (mem_u64x2.buf, result_u64x2);
319 
320   for (i=0; i<2; i++)
321       if (mem_u64x2.buf[i] != expected_u64x2[i])
322 	{
323 	  printf ("tst_vextq_u64[%d]=%lld expected %lld\n",
324 		  i, mem_u64x2.buf[i], expected_u64x2[i]);
325 	  abort ();
326 	}
327 
328   result_u64x2 = tst_vextq_u64_rotate (vec_u64x2);
329   vst1q_u64 (mem_u64x2.buf, result_u64x2);
330 
331   for (i=0; i<2; i++)
332       if (mem_u64x2.buf[i] != expected_rot_u64x2[i])
333 	{
334 	  printf ("tst_vextq_u64_rotate[%d]=%lld expected %lld\n",
335 		  i, mem_u64x2.buf[i], expected_rot_u64x2[i]);
336 	  abort ();
337 	}
338 
339   return 0;
340 }
341