1 /* { dg-do run } */
2 /* { dg-require-effective-target arm_neon_ok } */
3 /* { dg-require-effective-target arm_neon_hw } */
4 /* { dg-require-effective-target arm_little_endian } */
5 /* { dg-options "-O2" } */
6 /* { dg-add-options arm_neon } */
7
8 #include <arm_neon.h>
9 #include <stdlib.h>
10 #include <stdio.h>
11
12 uint8x8_t
tst_vext_u8(uint8x8_t __a,uint8x8_t __b)13 tst_vext_u8 (uint8x8_t __a, uint8x8_t __b)
14 {
15 uint8x8_t __mask1 = {2, 3, 4, 5, 6, 7, 8, 9};
16
17 return __builtin_shuffle ( __a, __b, __mask1) ;
18 }
19
20 uint8x8_t
tst_vext_u8_rotate(uint8x8_t __a)21 tst_vext_u8_rotate (uint8x8_t __a)
22 {
23 uint8x8_t __mask1 = {2, 3, 4, 5, 6, 7, 0, 1};
24 return __builtin_shuffle ( __a, __mask1) ;
25 }
26
27 uint16x4_t
tst_vext_u16(uint16x4_t __a,uint16x4_t __b)28 tst_vext_u16 (uint16x4_t __a, uint16x4_t __b)
29 {
30 uint16x4_t __mask1 = {2, 3, 4, 5};
31 return __builtin_shuffle ( __a, __b, __mask1) ;
32 }
33
34 uint16x4_t
tst_vext_u16_rotate(uint16x4_t __a)35 tst_vext_u16_rotate (uint16x4_t __a)
36 {
37 uint16x4_t __mask1 = {2, 3, 0, 1};
38 return __builtin_shuffle ( __a, __mask1) ;
39 }
40
41 uint32x2_t
tst_vext_u32(uint32x2_t __a,uint32x2_t __b)42 tst_vext_u32 (uint32x2_t __a, uint32x2_t __b)
43 {
44 uint32x2_t __mask1 = {1, 2};
45 return __builtin_shuffle ( __a, __b, __mask1) ;
46 }
47
48 /* This one is mapped into vrev64.32. */
49 uint32x2_t
tst_vext_u32_rotate(uint32x2_t __a)50 tst_vext_u32_rotate (uint32x2_t __a)
51 {
52 uint32x2_t __mask1 = {1, 0};
53 return __builtin_shuffle ( __a, __mask1) ;
54 }
55
56 uint8x16_t
tst_vextq_u8(uint8x16_t __a,uint8x16_t __b)57 tst_vextq_u8 (uint8x16_t __a, uint8x16_t __b)
58 {
59 uint8x16_t __mask1 = {4, 5, 6, 7, 8, 9, 10, 11,
60 12, 13, 14, 15, 16, 17, 18, 19};
61 return __builtin_shuffle ( __a, __b, __mask1) ;
62 }
63
64 uint8x16_t
tst_vextq_u8_rotate(uint8x16_t __a)65 tst_vextq_u8_rotate (uint8x16_t __a)
66 {
67 uint8x16_t __mask1 = {4, 5, 6, 7, 8, 9, 10, 11,
68 12, 13, 14, 15, 0, 1, 2, 3};
69 return __builtin_shuffle ( __a, __mask1) ;
70 }
71
72 uint16x8_t
tst_vextq_u16(uint16x8_t __a,uint16x8_t __b)73 tst_vextq_u16 (uint16x8_t __a, uint16x8_t __b)
74 {
75 uint16x8_t __mask1 = {2, 3, 4, 5, 6, 7, 8, 9};
76 return __builtin_shuffle ( __a, __b, __mask1) ;
77 }
78
79 uint16x8_t
tst_vextq_u16_rotate(uint16x8_t __a)80 tst_vextq_u16_rotate (uint16x8_t __a)
81 {
82 uint16x8_t __mask1 = {2, 3, 4, 5, 6, 7, 0, 1};
83 return __builtin_shuffle ( __a, __mask1) ;
84 }
85
86 uint32x4_t
tst_vextq_u32(uint32x4_t __a,uint32x4_t __b)87 tst_vextq_u32 (uint32x4_t __a, uint32x4_t __b)
88 {
89 uint32x4_t __mask1 = {1, 2, 3, 4};
90 return __builtin_shuffle ( __a, __b, __mask1) ;
91 }
92
93 uint32x4_t
tst_vextq_u32_rotate(uint32x4_t __a)94 tst_vextq_u32_rotate (uint32x4_t __a)
95 {
96 uint32x4_t __mask1 = {1, 2, 3, 0};
97 return __builtin_shuffle ( __a, __mask1) ;
98 }
99
100 uint64x2_t
tst_vextq_u64(uint64x2_t __a,uint64x2_t __b)101 tst_vextq_u64 (uint64x2_t __a, uint64x2_t __b)
102 {
103 uint64x2_t __mask1 = {1, 2};
104 return __builtin_shuffle ( __a, __b, __mask1) ;
105 }
106
107 uint64x2_t
tst_vextq_u64_rotate(uint64x2_t __a)108 tst_vextq_u64_rotate (uint64x2_t __a)
109 {
110 uint64x2_t __mask1 = {1, 0};
111 return __builtin_shuffle ( __a, __mask1) ;
112 }
113
main(void)114 int main (void)
115 {
116 uint8_t arr_u8x8[] = {0, 1, 2, 3, 4, 5, 6, 7};
117 uint8_t arr2_u8x8[] = {8, 9, 10, 11, 12, 13, 14, 15};
118 uint16_t arr_u16x4[] = {0, 1, 2, 3};
119 uint16_t arr2_u16x4[] = {4, 5, 6, 7};
120 uint32_t arr_u32x2[] = {0, 1};
121 uint32_t arr2_u32x2[] = {2, 3};
122 uint8_t arr_u8x16[] = {0, 1, 2, 3, 4, 5, 6, 7,
123 8, 9, 10, 11, 12, 13, 14, 15};
124 uint8_t arr2_u8x16[] = {16, 17, 18, 19, 20, 21, 22, 23,
125 24, 25, 26, 27, 28, 29, 30, 31};
126 uint16_t arr_u16x8[] = {0, 1, 2, 3, 4, 5, 6, 7};
127 uint16_t arr2_u16x8[] = {8, 9, 10, 11, 12, 13, 14, 15};
128 uint32_t arr_u32x4[] = {0, 1, 2, 3};
129 uint32_t arr2_u32x4[] = {4, 5, 6, 7};
130 uint64_t arr_u64x2[] = {0, 1};
131 uint64_t arr2_u64x2[] = {2, 3};
132
133 uint8_t expected_u8x8[] = {2, 3, 4, 5, 6, 7, 8, 9};
134 uint8_t expected_rot_u8x8[] = {2, 3, 4, 5, 6, 7, 0, 1};
135 uint16_t expected_u16x4[] = {2, 3, 4, 5};
136 uint16_t expected_rot_u16x4[] = {2, 3, 0, 1};
137 uint32_t expected_u32x2[] = {1, 2};
138 uint32_t expected_rot_u32x2[] = {1, 0};
139 uint8_t expected_u8x16[] = {4, 5, 6, 7, 8, 9, 10, 11,
140 12, 13, 14, 15, 16, 17, 18, 19};
141 uint8_t expected_rot_u8x16[] = {4, 5, 6, 7, 8, 9, 10, 11,
142 12, 13, 14, 15, 0, 1, 2, 3,};
143 uint16_t expected_u16x8[] = {2, 3, 4, 5, 6, 7, 8, 9};
144 uint16_t expected_rot_u16x8[] = {2, 3, 4, 5, 6, 7, 0, 1};
145 uint32_t expected_u32x4[] = {1, 2, 3, 4};
146 uint32_t expected_rot_u32x4[] = {1, 2, 3, 0};
147 uint64_t expected_u64x2[] = {1, 2};
148 uint64_t expected_rot_u64x2[] = {1, 0};
149
150 uint8x8_t vec_u8x8 = vld1_u8 (arr_u8x8);
151 uint8x8_t vec2_u8x8 = vld1_u8 (arr2_u8x8);
152 uint16x4_t vec_u16x4 = vld1_u16 (arr_u16x4);
153 uint16x4_t vec2_u16x4 = vld1_u16 (arr2_u16x4);
154 uint32x2_t vec_u32x2 = vld1_u32 (arr_u32x2);
155 uint32x2_t vec2_u32x2 = vld1_u32 (arr2_u32x2);
156 uint8x16_t vec_u8x16 = vld1q_u8 (arr_u8x16);
157 uint8x16_t vec2_u8x16 = vld1q_u8 (arr2_u8x16);
158 uint16x8_t vec_u16x8 = vld1q_u16 (arr_u16x8);
159 uint16x8_t vec2_u16x8 = vld1q_u16 (arr2_u16x8);
160 uint32x4_t vec_u32x4 = vld1q_u32 (arr_u32x4);
161 uint32x4_t vec2_u32x4 = vld1q_u32 (arr2_u32x4);
162 uint64x2_t vec_u64x2 = vld1q_u64 (arr_u64x2);
163 uint64x2_t vec2_u64x2 = vld1q_u64 (arr2_u64x2);
164
165 uint8x8_t result_u8x8;
166 uint16x4_t result_u16x4;
167 uint32x2_t result_u32x2;
168 uint8x16_t result_u8x16;
169 uint16x8_t result_u16x8;
170 uint32x4_t result_u32x4;
171 uint64x2_t result_u64x2;
172
173 union {uint8x8_t v; uint8_t buf[8];} mem_u8x8;
174 union {uint16x4_t v; uint16_t buf[4];} mem_u16x4;
175 union {uint32x2_t v; uint32_t buf[2];} mem_u32x2;
176 union {uint8x16_t v; uint8_t buf[16];} mem_u8x16;
177 union {uint16x8_t v; uint16_t buf[8];} mem_u16x8;
178 union {uint32x4_t v; uint32_t buf[4];} mem_u32x4;
179 union {uint64x2_t v; uint64_t buf[2];} mem_u64x2;
180
181 int i;
182
183 result_u8x8 = tst_vext_u8 (vec_u8x8, vec2_u8x8);
184 vst1_u8 (mem_u8x8.buf, result_u8x8);
185
186 for (i=0; i<8; i++)
187 if (mem_u8x8.buf[i] != expected_u8x8[i])
188 {
189 printf ("tst_vext_u8[%d]=%d expected %d\n",
190 i, mem_u8x8.buf[i], expected_u8x8[i]);
191 abort ();
192 }
193
194 result_u8x8 = tst_vext_u8_rotate (vec_u8x8);
195 vst1_u8 (mem_u8x8.buf, result_u8x8);
196
197 for (i=0; i<8; i++)
198 if (mem_u8x8.buf[i] != expected_rot_u8x8[i])
199 {
200 printf ("tst_vext_u8_rotate[%d]=%d expected %d\n",
201 i, mem_u8x8.buf[i], expected_rot_u8x8[i]);
202 abort ();
203 }
204
205
206 result_u16x4 = tst_vext_u16 (vec_u16x4, vec2_u16x4);
207 vst1_u16 (mem_u16x4.buf, result_u16x4);
208
209 for (i=0; i<4; i++)
210 if (mem_u16x4.buf[i] != expected_u16x4[i])
211 {
212 printf ("tst_vext_u16[%d]=%d expected %d\n",
213 i, mem_u16x4.buf[i], expected_u16x4[i]);
214 abort ();
215 }
216
217 result_u16x4 = tst_vext_u16_rotate (vec_u16x4);
218 vst1_u16 (mem_u16x4.buf, result_u16x4);
219
220 for (i=0; i<4; i++)
221 if (mem_u16x4.buf[i] != expected_rot_u16x4[i])
222 {
223 printf ("tst_vext_u16_rotate[%d]=%d expected %d\n",
224 i, mem_u16x4.buf[i], expected_rot_u16x4[i]);
225 abort ();
226 }
227
228
229 result_u32x2 = tst_vext_u32 (vec_u32x2, vec2_u32x2);
230 vst1_u32 (mem_u32x2.buf, result_u32x2);
231
232 for (i=0; i<2; i++)
233 if (mem_u32x2.buf[i] != expected_u32x2[i])
234 {
235 printf ("tst_vext_u32[%d]=%d expected %d\n",
236 i, mem_u32x2.buf[i], expected_u32x2[i]);
237 abort ();
238 }
239
240 result_u32x2 = tst_vext_u32_rotate (vec_u32x2);
241 vst1_u32 (mem_u32x2.buf, result_u32x2);
242
243 for (i=0; i<2; i++)
244 if (mem_u32x2.buf[i] != expected_rot_u32x2[i])
245 {
246 printf ("tst_vext_u32_rotate[%d]=%d expected %d\n",
247 i, mem_u32x2.buf[i], expected_rot_u32x2[i]);
248 abort ();
249 }
250
251
252 result_u8x16 = tst_vextq_u8 (vec_u8x16, vec2_u8x16);
253 vst1q_u8 (mem_u8x16.buf, result_u8x16);
254
255 for (i=0; i<16; i++)
256 if (mem_u8x16.buf[i] != expected_u8x16[i])
257 {
258 printf ("tst_vextq_u8[%d]=%d expected %d\n",
259 i, mem_u8x16.buf[i], expected_u8x16[i]);
260 abort ();
261 }
262
263 result_u8x16 = tst_vextq_u8_rotate (vec_u8x16);
264 vst1q_u8 (mem_u8x16.buf, result_u8x16);
265
266 for (i=0; i<16; i++)
267 if (mem_u8x16.buf[i] != expected_rot_u8x16[i])
268 {
269 printf ("tst_vextq_u8_rotate[%d]=%d expected %d\n",
270 i, mem_u8x16.buf[i], expected_rot_u8x16[i]);
271 abort ();
272 }
273
274 result_u16x8 = tst_vextq_u16 (vec_u16x8, vec2_u16x8);
275 vst1q_u16 (mem_u16x8.buf, result_u16x8);
276
277 for (i=0; i<8; i++)
278 if (mem_u16x8.buf[i] != expected_u16x8[i])
279 {
280 printf ("tst_vextq_u16[%d]=%d expected %d\n",
281 i, mem_u16x8.buf[i], expected_u16x8[i]);
282 abort ();
283 }
284
285 result_u16x8 = tst_vextq_u16_rotate (vec_u16x8);
286 vst1q_u16 (mem_u16x8.buf, result_u16x8);
287
288 for (i=0; i<8; i++)
289 if (mem_u16x8.buf[i] != expected_rot_u16x8[i])
290 {
291 printf ("tst_vextq_u16_rotate[%d]=%d expected %d\n",
292 i, mem_u16x8.buf[i], expected_rot_u16x8[i]);
293 abort ();
294 }
295
296 result_u32x4 = tst_vextq_u32 (vec_u32x4, vec2_u32x4);
297 vst1q_u32 (mem_u32x4.buf, result_u32x4);
298
299 for (i=0; i<4; i++)
300 if (mem_u32x4.buf[i] != expected_u32x4[i])
301 {
302 printf ("tst_vextq_u32[%d]=%d expected %d\n",
303 i, mem_u32x4.buf[i], expected_u32x4[i]);
304 abort ();
305 }
306
307 result_u32x4 = tst_vextq_u32_rotate (vec_u32x4);
308 vst1q_u32 (mem_u32x4.buf, result_u32x4);
309
310 for (i=0; i<4; i++)
311 if (mem_u32x4.buf[i] != expected_rot_u32x4[i])
312 {
313 printf ("tst_vextq_u32_rotate[%d]=%d expected %d\n",
314 i, mem_u32x4.buf[i], expected_rot_u32x4[i]);
315 abort ();
316 }
317
318 result_u64x2 = tst_vextq_u64 (vec_u64x2, vec2_u64x2);
319 vst1q_u64 (mem_u64x2.buf, result_u64x2);
320
321 for (i=0; i<2; i++)
322 if (mem_u64x2.buf[i] != expected_u64x2[i])
323 {
324 printf ("tst_vextq_u64[%d]=%lld expected %lld\n",
325 i, mem_u64x2.buf[i], expected_u64x2[i]);
326 abort ();
327 }
328
329 result_u64x2 = tst_vextq_u64_rotate (vec_u64x2);
330 vst1q_u64 (mem_u64x2.buf, result_u64x2);
331
332 for (i=0; i<2; i++)
333 if (mem_u64x2.buf[i] != expected_rot_u64x2[i])
334 {
335 printf ("tst_vextq_u64_rotate[%d]=%lld expected %lld\n",
336 i, mem_u64x2.buf[i], expected_rot_u64x2[i]);
337 abort ();
338 }
339
340 return 0;
341 }
342