1 /* { dg-do run } */
2 /* { dg-require-effective-target arm_neon_ok } */
3 /* { dg-require-effective-target arm_little_endian } */
4 /* { dg-options "-O2" } */
5 /* { dg-add-options arm_neon } */
6
7 #include <arm_neon.h>
8 #include <stdlib.h>
9 #include <stdio.h>
10
11 uint8x8_t
tst_vext_u8(uint8x8_t __a,uint8x8_t __b)12 tst_vext_u8 (uint8x8_t __a, uint8x8_t __b)
13 {
14 uint8x8_t __mask1 = {2, 3, 4, 5, 6, 7, 8, 9};
15
16 return __builtin_shuffle ( __a, __b, __mask1) ;
17 }
18
19 uint8x8_t
tst_vext_u8_rotate(uint8x8_t __a)20 tst_vext_u8_rotate (uint8x8_t __a)
21 {
22 uint8x8_t __mask1 = {2, 3, 4, 5, 6, 7, 0, 1};
23 return __builtin_shuffle ( __a, __mask1) ;
24 }
25
26 uint16x4_t
tst_vext_u16(uint16x4_t __a,uint16x4_t __b)27 tst_vext_u16 (uint16x4_t __a, uint16x4_t __b)
28 {
29 uint16x4_t __mask1 = {2, 3, 4, 5};
30 return __builtin_shuffle ( __a, __b, __mask1) ;
31 }
32
33 uint16x4_t
tst_vext_u16_rotate(uint16x4_t __a)34 tst_vext_u16_rotate (uint16x4_t __a)
35 {
36 uint16x4_t __mask1 = {2, 3, 0, 1};
37 return __builtin_shuffle ( __a, __mask1) ;
38 }
39
40 uint32x2_t
tst_vext_u32(uint32x2_t __a,uint32x2_t __b)41 tst_vext_u32 (uint32x2_t __a, uint32x2_t __b)
42 {
43 uint32x2_t __mask1 = {1, 2};
44 return __builtin_shuffle ( __a, __b, __mask1) ;
45 }
46
47 /* This one is mapped into vrev64.32. */
48 uint32x2_t
tst_vext_u32_rotate(uint32x2_t __a)49 tst_vext_u32_rotate (uint32x2_t __a)
50 {
51 uint32x2_t __mask1 = {1, 0};
52 return __builtin_shuffle ( __a, __mask1) ;
53 }
54
55 uint8x16_t
tst_vextq_u8(uint8x16_t __a,uint8x16_t __b)56 tst_vextq_u8 (uint8x16_t __a, uint8x16_t __b)
57 {
58 uint8x16_t __mask1 = {4, 5, 6, 7, 8, 9, 10, 11,
59 12, 13, 14, 15, 16, 17, 18, 19};
60 return __builtin_shuffle ( __a, __b, __mask1) ;
61 }
62
63 uint8x16_t
tst_vextq_u8_rotate(uint8x16_t __a)64 tst_vextq_u8_rotate (uint8x16_t __a)
65 {
66 uint8x16_t __mask1 = {4, 5, 6, 7, 8, 9, 10, 11,
67 12, 13, 14, 15, 0, 1, 2, 3};
68 return __builtin_shuffle ( __a, __mask1) ;
69 }
70
71 uint16x8_t
tst_vextq_u16(uint16x8_t __a,uint16x8_t __b)72 tst_vextq_u16 (uint16x8_t __a, uint16x8_t __b)
73 {
74 uint16x8_t __mask1 = {2, 3, 4, 5, 6, 7, 8, 9};
75 return __builtin_shuffle ( __a, __b, __mask1) ;
76 }
77
78 uint16x8_t
tst_vextq_u16_rotate(uint16x8_t __a)79 tst_vextq_u16_rotate (uint16x8_t __a)
80 {
81 uint16x8_t __mask1 = {2, 3, 4, 5, 6, 7, 0, 1};
82 return __builtin_shuffle ( __a, __mask1) ;
83 }
84
85 uint32x4_t
tst_vextq_u32(uint32x4_t __a,uint32x4_t __b)86 tst_vextq_u32 (uint32x4_t __a, uint32x4_t __b)
87 {
88 uint32x4_t __mask1 = {1, 2, 3, 4};
89 return __builtin_shuffle ( __a, __b, __mask1) ;
90 }
91
92 uint32x4_t
tst_vextq_u32_rotate(uint32x4_t __a)93 tst_vextq_u32_rotate (uint32x4_t __a)
94 {
95 uint32x4_t __mask1 = {1, 2, 3, 0};
96 return __builtin_shuffle ( __a, __mask1) ;
97 }
98
99 uint64x2_t
tst_vextq_u64(uint64x2_t __a,uint64x2_t __b)100 tst_vextq_u64 (uint64x2_t __a, uint64x2_t __b)
101 {
102 uint64x2_t __mask1 = {1, 2};
103 return __builtin_shuffle ( __a, __b, __mask1) ;
104 }
105
106 uint64x2_t
tst_vextq_u64_rotate(uint64x2_t __a)107 tst_vextq_u64_rotate (uint64x2_t __a)
108 {
109 uint64x2_t __mask1 = {1, 0};
110 return __builtin_shuffle ( __a, __mask1) ;
111 }
112
main(void)113 int main (void)
114 {
115 uint8_t arr_u8x8[] = {0, 1, 2, 3, 4, 5, 6, 7};
116 uint8_t arr2_u8x8[] = {8, 9, 10, 11, 12, 13, 14, 15};
117 uint16_t arr_u16x4[] = {0, 1, 2, 3};
118 uint16_t arr2_u16x4[] = {4, 5, 6, 7};
119 uint32_t arr_u32x2[] = {0, 1};
120 uint32_t arr2_u32x2[] = {2, 3};
121 uint8_t arr_u8x16[] = {0, 1, 2, 3, 4, 5, 6, 7,
122 8, 9, 10, 11, 12, 13, 14, 15};
123 uint8_t arr2_u8x16[] = {16, 17, 18, 19, 20, 21, 22, 23,
124 24, 25, 26, 27, 28, 29, 30, 31};
125 uint16_t arr_u16x8[] = {0, 1, 2, 3, 4, 5, 6, 7};
126 uint16_t arr2_u16x8[] = {8, 9, 10, 11, 12, 13, 14, 15};
127 uint32_t arr_u32x4[] = {0, 1, 2, 3};
128 uint32_t arr2_u32x4[] = {4, 5, 6, 7};
129 uint64_t arr_u64x2[] = {0, 1};
130 uint64_t arr2_u64x2[] = {2, 3};
131
132 uint8_t expected_u8x8[] = {2, 3, 4, 5, 6, 7, 8, 9};
133 uint8_t expected_rot_u8x8[] = {2, 3, 4, 5, 6, 7, 0, 1};
134 uint16_t expected_u16x4[] = {2, 3, 4, 5};
135 uint16_t expected_rot_u16x4[] = {2, 3, 0, 1};
136 uint32_t expected_u32x2[] = {1, 2};
137 uint32_t expected_rot_u32x2[] = {1, 0};
138 uint8_t expected_u8x16[] = {4, 5, 6, 7, 8, 9, 10, 11,
139 12, 13, 14, 15, 16, 17, 18, 19};
140 uint8_t expected_rot_u8x16[] = {4, 5, 6, 7, 8, 9, 10, 11,
141 12, 13, 14, 15, 0, 1, 2, 3,};
142 uint16_t expected_u16x8[] = {2, 3, 4, 5, 6, 7, 8, 9};
143 uint16_t expected_rot_u16x8[] = {2, 3, 4, 5, 6, 7, 0, 1};
144 uint32_t expected_u32x4[] = {1, 2, 3, 4};
145 uint32_t expected_rot_u32x4[] = {1, 2, 3, 0};
146 uint64_t expected_u64x2[] = {1, 2};
147 uint64_t expected_rot_u64x2[] = {1, 0};
148
149 uint8x8_t vec_u8x8 = vld1_u8 (arr_u8x8);
150 uint8x8_t vec2_u8x8 = vld1_u8 (arr2_u8x8);
151 uint16x4_t vec_u16x4 = vld1_u16 (arr_u16x4);
152 uint16x4_t vec2_u16x4 = vld1_u16 (arr2_u16x4);
153 uint32x2_t vec_u32x2 = vld1_u32 (arr_u32x2);
154 uint32x2_t vec2_u32x2 = vld1_u32 (arr2_u32x2);
155 uint8x16_t vec_u8x16 = vld1q_u8 (arr_u8x16);
156 uint8x16_t vec2_u8x16 = vld1q_u8 (arr2_u8x16);
157 uint16x8_t vec_u16x8 = vld1q_u16 (arr_u16x8);
158 uint16x8_t vec2_u16x8 = vld1q_u16 (arr2_u16x8);
159 uint32x4_t vec_u32x4 = vld1q_u32 (arr_u32x4);
160 uint32x4_t vec2_u32x4 = vld1q_u32 (arr2_u32x4);
161 uint64x2_t vec_u64x2 = vld1q_u64 (arr_u64x2);
162 uint64x2_t vec2_u64x2 = vld1q_u64 (arr2_u64x2);
163
164 uint8x8_t result_u8x8;
165 uint16x4_t result_u16x4;
166 uint32x2_t result_u32x2;
167 uint8x16_t result_u8x16;
168 uint16x8_t result_u16x8;
169 uint32x4_t result_u32x4;
170 uint64x2_t result_u64x2;
171
172 union {uint8x8_t v; uint8_t buf[8];} mem_u8x8;
173 union {uint16x4_t v; uint16_t buf[4];} mem_u16x4;
174 union {uint32x2_t v; uint32_t buf[2];} mem_u32x2;
175 union {uint8x16_t v; uint8_t buf[16];} mem_u8x16;
176 union {uint16x8_t v; uint16_t buf[8];} mem_u16x8;
177 union {uint32x4_t v; uint32_t buf[4];} mem_u32x4;
178 union {uint64x2_t v; uint64_t buf[2];} mem_u64x2;
179
180 int i;
181
182 result_u8x8 = tst_vext_u8 (vec_u8x8, vec2_u8x8);
183 vst1_u8 (mem_u8x8.buf, result_u8x8);
184
185 for (i=0; i<8; i++)
186 if (mem_u8x8.buf[i] != expected_u8x8[i])
187 {
188 printf ("tst_vext_u8[%d]=%d expected %d\n",
189 i, mem_u8x8.buf[i], expected_u8x8[i]);
190 abort ();
191 }
192
193 result_u8x8 = tst_vext_u8_rotate (vec_u8x8);
194 vst1_u8 (mem_u8x8.buf, result_u8x8);
195
196 for (i=0; i<8; i++)
197 if (mem_u8x8.buf[i] != expected_rot_u8x8[i])
198 {
199 printf ("tst_vext_u8_rotate[%d]=%d expected %d\n",
200 i, mem_u8x8.buf[i], expected_rot_u8x8[i]);
201 abort ();
202 }
203
204
205 result_u16x4 = tst_vext_u16 (vec_u16x4, vec2_u16x4);
206 vst1_u16 (mem_u16x4.buf, result_u16x4);
207
208 for (i=0; i<4; i++)
209 if (mem_u16x4.buf[i] != expected_u16x4[i])
210 {
211 printf ("tst_vext_u16[%d]=%d expected %d\n",
212 i, mem_u16x4.buf[i], expected_u16x4[i]);
213 abort ();
214 }
215
216 result_u16x4 = tst_vext_u16_rotate (vec_u16x4);
217 vst1_u16 (mem_u16x4.buf, result_u16x4);
218
219 for (i=0; i<4; i++)
220 if (mem_u16x4.buf[i] != expected_rot_u16x4[i])
221 {
222 printf ("tst_vext_u16_rotate[%d]=%d expected %d\n",
223 i, mem_u16x4.buf[i], expected_rot_u16x4[i]);
224 abort ();
225 }
226
227
228 result_u32x2 = tst_vext_u32 (vec_u32x2, vec2_u32x2);
229 vst1_u32 (mem_u32x2.buf, result_u32x2);
230
231 for (i=0; i<2; i++)
232 if (mem_u32x2.buf[i] != expected_u32x2[i])
233 {
234 printf ("tst_vext_u32[%d]=%d expected %d\n",
235 i, mem_u32x2.buf[i], expected_u32x2[i]);
236 abort ();
237 }
238
239 result_u32x2 = tst_vext_u32_rotate (vec_u32x2);
240 vst1_u32 (mem_u32x2.buf, result_u32x2);
241
242 for (i=0; i<2; i++)
243 if (mem_u32x2.buf[i] != expected_rot_u32x2[i])
244 {
245 printf ("tst_vext_u32_rotate[%d]=%d expected %d\n",
246 i, mem_u32x2.buf[i], expected_rot_u32x2[i]);
247 abort ();
248 }
249
250
251 result_u8x16 = tst_vextq_u8 (vec_u8x16, vec2_u8x16);
252 vst1q_u8 (mem_u8x16.buf, result_u8x16);
253
254 for (i=0; i<16; i++)
255 if (mem_u8x16.buf[i] != expected_u8x16[i])
256 {
257 printf ("tst_vextq_u8[%d]=%d expected %d\n",
258 i, mem_u8x16.buf[i], expected_u8x16[i]);
259 abort ();
260 }
261
262 result_u8x16 = tst_vextq_u8_rotate (vec_u8x16);
263 vst1q_u8 (mem_u8x16.buf, result_u8x16);
264
265 for (i=0; i<16; i++)
266 if (mem_u8x16.buf[i] != expected_rot_u8x16[i])
267 {
268 printf ("tst_vextq_u8_rotate[%d]=%d expected %d\n",
269 i, mem_u8x16.buf[i], expected_rot_u8x16[i]);
270 abort ();
271 }
272
273 result_u16x8 = tst_vextq_u16 (vec_u16x8, vec2_u16x8);
274 vst1q_u16 (mem_u16x8.buf, result_u16x8);
275
276 for (i=0; i<8; i++)
277 if (mem_u16x8.buf[i] != expected_u16x8[i])
278 {
279 printf ("tst_vextq_u16[%d]=%d expected %d\n",
280 i, mem_u16x8.buf[i], expected_u16x8[i]);
281 abort ();
282 }
283
284 result_u16x8 = tst_vextq_u16_rotate (vec_u16x8);
285 vst1q_u16 (mem_u16x8.buf, result_u16x8);
286
287 for (i=0; i<8; i++)
288 if (mem_u16x8.buf[i] != expected_rot_u16x8[i])
289 {
290 printf ("tst_vextq_u16_rotate[%d]=%d expected %d\n",
291 i, mem_u16x8.buf[i], expected_rot_u16x8[i]);
292 abort ();
293 }
294
295 result_u32x4 = tst_vextq_u32 (vec_u32x4, vec2_u32x4);
296 vst1q_u32 (mem_u32x4.buf, result_u32x4);
297
298 for (i=0; i<4; i++)
299 if (mem_u32x4.buf[i] != expected_u32x4[i])
300 {
301 printf ("tst_vextq_u32[%d]=%d expected %d\n",
302 i, mem_u32x4.buf[i], expected_u32x4[i]);
303 abort ();
304 }
305
306 result_u32x4 = tst_vextq_u32_rotate (vec_u32x4);
307 vst1q_u32 (mem_u32x4.buf, result_u32x4);
308
309 for (i=0; i<4; i++)
310 if (mem_u32x4.buf[i] != expected_rot_u32x4[i])
311 {
312 printf ("tst_vextq_u32_rotate[%d]=%d expected %d\n",
313 i, mem_u32x4.buf[i], expected_rot_u32x4[i]);
314 abort ();
315 }
316
317 result_u64x2 = tst_vextq_u64 (vec_u64x2, vec2_u64x2);
318 vst1q_u64 (mem_u64x2.buf, result_u64x2);
319
320 for (i=0; i<2; i++)
321 if (mem_u64x2.buf[i] != expected_u64x2[i])
322 {
323 printf ("tst_vextq_u64[%d]=%lld expected %lld\n",
324 i, mem_u64x2.buf[i], expected_u64x2[i]);
325 abort ();
326 }
327
328 result_u64x2 = tst_vextq_u64_rotate (vec_u64x2);
329 vst1q_u64 (mem_u64x2.buf, result_u64x2);
330
331 for (i=0; i<2; i++)
332 if (mem_u64x2.buf[i] != expected_rot_u64x2[i])
333 {
334 printf ("tst_vextq_u64_rotate[%d]=%lld expected %lld\n",
335 i, mem_u64x2.buf[i], expected_rot_u64x2[i]);
336 abort ();
337 }
338
339 return 0;
340 }
341